[
  {
    "path": ".gitattributes",
    "content": "*.pl linguist-language=assembly\n*.h linguist-language=c\n*.tgo linguist-language=go\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: build\n\non:\n  push:\n    branches:\n    - '**'\n  workflow_dispatch:\n    branches:\n    - '**'\n  pull_request:\n    branches:\n    - master\n\njobs:\n  rust-n-go:\n    runs-on: ${{ matrix.os }}\n\n    strategy:\n      matrix:\n        os: [ ubuntu-latest, ubuntu-24.04-arm, windows-latest, windows-11-arm, macos-latest ]\n\n    steps:\n    - uses: actions/checkout@v6\n\n    - name: Get date\n      id: get-date\n      run: echo \"date=$(date -u +%Y-%m)\" >> $GITHUB_OUTPUT\n      shell: bash\n\n    - uses: actions/cache@v5\n      with:\n        path: |\n          ~/.cargo/registry\n          **/Cargo.lock\n          **/bindings/rust/target\n          ~/.wasmtime\n        key: ${{ runner.os }}-${{ runner.arch }}-cargo-${{ steps.get-date.outputs.date }}\n\n    - name: Environment\n      shell: bash\n      run: |\n        lscpu 2>/dev/null && echo --- || true\n        sysctl hw 2>/dev/null && echo --- || true\n        env | sort\n\n    - name: Install Wasmtime\n      if: ${{ runner.os == 'Linux' }}\n      shell: bash\n      run: if [ ! -d ~/.wasmtime/bin ]; then curl https://wasmtime.dev/install.sh -sSf | bash; fi\n\n    - name: Rust\n      shell: bash\n      run: |\n        rustc --version --verbose\n        export CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse\n        cd bindings/rust\n        sed \"s/^crit/#crit/\" Cargo.toml > Cargo.$$.toml && \\\n        mv Cargo.$$.toml Cargo.toml\n        if [ \"$GITHUB_EVENT_NAME\" != \"pull_request\" ]; then\n            cargo update\n        fi\n        cargo test --release\n        echo '--- test portable'\n        echo\n        cargo test --release --features=portable\n        echo '--- test no-threads'\n        echo\n        cargo test --release --features=no-threads\n        echo '--- test serde-secret'\n        echo\n        cargo test --release --features=serde-secret\n        echo '--- test no_std'\n        echo\n        echo 'set -e'                                       > ulimit-s\n        echo 'export RUST_MIN_STACK=$(($1 * 1024)); shift'  >> ulimit-s\n        echo 'exec \"$@\"'                                    >> ulimit-s\n        triplet=`rustc -vV | awk '/host:/ {print $2}' | tr 'a-z-' 'A-Z_'`\n        stack_size=`[ $RUNNER_OS = \"Windows\" ] && echo 65 || echo 56`\n        env BLST_TEST_NO_STD= \\\n            CARGO_TARGET_${triplet}_RUNNER=\"bash ulimit-s $stack_size\" \\\n            cargo test --release\n        if [ -x ~/.wasmtime/bin/wasmtime ]; then\n            echo '--- test wasm32-wasip1'\n            echo\n            rustup target add wasm32-wasip1\n            env CARGO_TARGET_WASM32_WASIP1_RUNNER=~/.wasmtime/bin/wasmtime \\\n                cargo test --release --target=wasm32-wasip1\n            cargo clean -p blst --release --target=wasm32-wasip1\n            echo\n        fi\n        if [ $RUNNER_OS = \"Linux\" ]; then\n            if [ `uname -p` = \"x86_64\" ]; then\n                echo '--- test -mlvi-hardening'\n                echo\n                env CC=clang CFLAGS=\"-mlvi-hardening -D__SGX_LVI_HARDENING__\" \\\n                    cargo test --release\n                echo '--- build x86_64-fortanix-unknown-sgx'\n                echo\n                rustup target add x86_64-fortanix-unknown-sgx\n                cargo test --no-run --release --target=x86_64-fortanix-unknown-sgx\n                cargo clean -p blst --release --target=x86_64-fortanix-unknown-sgx\n                echo\n            fi\n            echo '--- dry-run publish'\n            echo\n            ./publish.sh --dry-run\n        elif [ $RUNNER_OS = \"macOS\" ]; then\n            if [ $RUNNER_ARCH = \"ARM64\" ]; then\n                echo '--- test x86_64-apple-darwin'\n                echo\n                rustup target add x86_64-apple-darwin\n                cargo test --release --target=x86_64-apple-darwin\n                cargo clean -p blst --release --target=x86_64-apple-darwin\n                echo\n            else\n                echo '--- build aarch64-apple-darwin'\n                echo\n                rustup target add aarch64-apple-darwin\n                cargo test --no-run --release --target=aarch64-apple-darwin\n                cargo clean -p blst --release --target=aarch64-apple-darwin\n                echo\n            fi\n            echo '--- build aarch64-apple-ios'\n            echo\n            rustup target add aarch64-apple-ios\n            env IPHONEOS_DEPLOYMENT_TARGET=10.0 \\\n                cargo test --no-run --release --target=aarch64-apple-ios\n            cargo clean -p blst --release --target=aarch64-apple-ios\n            echo\n        elif [ $RUNNER_OS = \"Windows\" -a $RUNNER_ARCH = \"X64\" ]; then\n            if which clang-cl > /dev/null 2>&1; then\n                echo '-- test i686-pc-windows-msvc'\n                echo\n                rustup target add i686-pc-windows-msvc\n                cargo test --release --target=i686-pc-windows-msvc\n                cargo clean -p blst --release --target=i686-pc-windows-msvc\n                echo\n            fi\n            echo '-- test x86_64-pc-windows-gnu'\n            echo\n            rustup target add x86_64-pc-windows-gnu\n            cargo test --release --target=x86_64-pc-windows-gnu\n            cargo clean -p blst --release --target=x86_64-pc-windows-gnu\n            echo\n        fi\n        echo\n        echo '--- cargo clippy'\n        echo\n        echo 'msrv = \"1.56\"' > .clippy.toml\n        cargo clippy --release\n        cargo clean -p blst\n        cargo clean -p blst --release\n        rm -rf target/.rustc_info.json\n        rm -rf target/package\n        rm -rf target/{debug,release}/incremental\n        rm -rf target/*/{debug,release}/incremental\n        rm -rf ~/.cargo/registry/src\n        rm -rf ~/.cargo/registry/index/*/.cache\n        mkdir -p ~/.wasmtime\n\n    - name: Go\n      if: ${{ runner.os != 'Windows' || runner.arch != 'ARM64' }}\n      shell: bash\n      run: |\n        go version 2>/dev/null || exit 0\n        if ! (grep -q -e '^flags.*\\badx\\b' /proc/cpuinfo) 2>/dev/null; then\n            export CGO_CFLAGS=\"-O -D__BLST_PORTABLE__\"\n        fi\n        cd bindings/go\n        go test -test.v\n\n  misc-ubuntu-latest:\n    runs-on: ubuntu-latest\n\n    steps:\n    - uses: actions/checkout@v6\n\n    - uses: actions/cache@v5\n      with:\n        path: ~/swig\n        key: ${{ runner.os }}-swig-github\n\n    - uses: actions/setup-java@v5\n      with:\n        distribution: temurin\n        java-version: 11\n\n    - uses: actions/setup-node@v6\n      with:\n        node-version: '20.x'\n\n    - name: Environment\n      run: |\n        lscpu\n        echo ---\n        env | sort\n\n    - name: Python\n      run: if [ -x bindings/python/run.me ]; then bindings/python/run.me; fi\n\n    - name: Java\n      run: if [ -x bindings/java/run.me ]; then bindings/java/run.me; fi\n\n    - name: Node.js\n      run: |\n        node_js=bindings/node.js\n        if [ -x $node_js/run.me ]; then\n            if [ ! -x ~/swig/bin/swig ]; then\n              ( git clone --branch v4.3.0 https://github.com/swig/swig;\n                cd swig;\n                ./autogen.sh;\n                ./configure --prefix=$HOME/swig;\n                make;\n                make install;\n                (cd ~/swig/share/swig && ln -s `ls` current)\n              )\n            fi\n            env PATH=~/swig/bin:$PATH SWIG_LIB=~/swig/share/swig/current \\\n                $node_js/run.me\n        fi\n    - name: node-gyp\n      run: |\n        node_js=bindings/node.js\n        if [ -f $node_js/binding.gyp -a -f $node_js/blst_wrap.cpp ]; then\n            npm install --global node-gyp || true\n            if which node-gyp > /dev/null 2>&1; then\n              ( export PATH=~/swig/bin:$PATH SWIG_LIB=~/swig/share/swig/current;\n                cd $node_js;\n                node-gyp configure;\n                node-gyp build;\n                env NODE_PATH=build/Release: node runnable.js;\n              )\n            fi\n        fi\n    - name: TypeScript\n      run: |\n        node_js=bindings/node.js\n        if [ -f $node_js/blst.hpp.ts -a -f $node_js/blst.node ]; then\n            npm install --global typescript || true\n            if which tsc > /dev/null 2>&1; then\n              ( cd $node_js;\n                npm install @types/node;\n                tsc runnable.ts --ignoreConfig --types node --module commonjs;\n                env NODE_PATH=.: node runnable.js;\n              )\n           fi\n        fi\n\n    - name: Emscripten\n      uses: docker://emscripten/emsdk\n      with:\n        args: >\n          bindings/emscripten/run.me -O2\n\n    - name: C#\n      run: |\n        if [ -x bindings/c#/run.me ]; then\n            bindings/c#/run.me;\n            if which dotnet > /dev/null 2>&1; then\n                cd bindings/c#\n                [ -f libblst.dll.so ] || ../../build.sh -dll\n                dotnet run -c Release\n            fi\n        fi\n\n    - uses: actions/cache@v5\n      with:\n        path: |\n          ~/.cache/zig\n          ~/zig-x86_64-linux-*\n          ~/.wasmtime\n        key: ${{ runner.os }}-zig-github\n\n    - name: Zig\n      run: |\n        ver=0.15.2\n        base_dir=zig-x86_64-linux-$ver\n        if [ ! -d ~/$base_dir ]; then\n            curl -sSf https://ziglang.org/download/$ver/$base_dir.tar.xz | unxz -c | tar xf - --directory ~\n        fi\n        if [ -x ~/$base_dir/zig ]; then\n            PATH=~/$base_dir:$PATH\n            zig build test --summary new\n            echo '--- test wasm32-wasi'\n            if [ ! -d ~/.wasmtime ]; then\n                curl https://wasmtime.dev/install.sh -sSf | bash\n            fi\n            PATH=~/.wasmtime/bin:$PATH\n            zig build test -Dtarget=wasm32-wasi -fwasmtime --summary new\n        fi\n"
  },
  {
    "path": ".github/workflows/codeql-analysis.yml",
    "content": "name: \"CodeQL\"\n\non:\n  push:\n    branches:\n      - '**'\n    paths:\n      - 'src/*'\n      - 'bindings/c#/*'\n      - '.github/workflows/codeql-analysis.yml'\n  pull_request:\n    branches:\n      - master\n    paths:\n      - 'src/*'\n      - 'bindings/c#/*'\n  #schedule:\n  #  - cron: '0 23+ * * 4'\n\njobs:\n  analyze:\n    name: Analyze\n    runs-on: ubuntu-latest\n\n    permissions:\n      security-events: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        language: [ 'cpp', 'csharp' ]\n\n    steps:\n    - name: Checkout repository\n      uses: actions/checkout@v6\n      with:\n        # We must fetch at least the immediate parents so that if this is\n        # a pull request then we can checkout the head.\n        fetch-depth: 2\n\n    # Initializes the CodeQL tools for scanning.\n    - name: Initialize CodeQL\n      uses: github/codeql-action/init@v4\n      with:\n        languages: ${{ matrix.language }}\n        queries: security-extended\n\n    - if: matrix.language == 'cpp'\n      name: Custom build\n      run: ./build.sh -m32 -ffreestanding\n\n    - if: matrix.language != 'cpp'\n      name: Autobuild\n      uses: github/codeql-action/autobuild@v4\n\n    - name: Perform CodeQL Analysis\n      uses: github/codeql-action/analyze@v4\n\n"
  },
  {
    "path": ".github/workflows/golang-lint.yml",
    "content": "name: golang-lint\n\non:\n  push:\n    branches:\n      - '**'\n    paths:\n      - 'bindings/go/*.go'\n      - '.github/workflows/golang-lint.yml'\n      - '.golangci.yml'\n  pull_request:\n    branches:\n      - master\n    paths:\n      - 'bindings/go/*.go'\n\njobs:\n  golang-lint:\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v6\n      - uses: actions/setup-go@v6\n        with:\n          go-version: '>=1.21'\n          cache: false\n      - name: \"go version\"\n        run: go version\n      - uses: golangci/golangci-lint-action@v9\n        with:\n          # Require: The version of golangci-lint to use.\n          # When `install-mode` is `binary` (default) the value can be v1.2 or v1.2.3 or `latest` to use the latest version.\n          # When `install-mode` is `goinstall` the value can be v1.2.3, `latest`, or the hash of a commit.\n          version: v2.9\n\n          # Optional: working directory, useful for monorepos\n          # working-directory: somedir\n\n          # Optional: golangci-lint command line arguments.\n          #\n          # Note: By default, the `.golangci.yml` file should be at the root of the repository.\n          # The location of the configuration file can be changed by using `--config=`\n          # args: --timeout=30m --config=/my/path/.golangci.yml --issues-exit-code=0 \n\n          # Optional: show only new issues if it's a pull request. The default value is `false`.\n          # only-new-issues: true\n\n          # Optional: if set to true, then all caching functionality will be completely disabled,\n          #           takes precedence over all other caching options.\n          skip-cache: true\n\n          # Optional: if set to true, then the action won't cache or restore ~/go/pkg.\n          # skip-pkg-cache: true\n\n          # Optional: if set to true, then the action won't cache or restore ~/.cache/go-build.\n          # skip-build-cache: true\n\n          # Optional: The mode to install golangci-lint. It can be 'binary' or 'goinstall'.\n          # install-mode: \"goinstall\"\n"
  },
  {
    "path": ".gitignore",
    "content": "# Prerequisites\n*.d\n\n# Object files\n*.o\n*.ko\n*.obj\n*.elf\n\n# Linker output\n*.ilk\n*.map\n*.exp\n\n# Precompiled Headers\n*.gch\n*.pch\n\n# Libraries\n*.lib\n*.a\n*.la\n*.lo\n\n# Shared objects (inc. Windows DLLs)\n*.dll\n*.so\n*.so.*\n*.dylib\n\n# Executables\n*.exe\n*.out\n*.app\n*.i*86\n*.x86_64\n*.hex\n\n# Debug files\n*.dSYM/\n*.su\n*.idb\n*.pdb\n\n# Kernel Module Compile Results\n*.mod*\n*.cmd\n.tmp_versions/\nmodules.order\nModule.symvers\nMkfile.old\ndkms.conf\n\n# Open swap files\n*.swp\n\n# Emacs backup files\n*~\n\n# Rust build \nCargo.lock\nbindings/rust/target\nbindings/rust/blst\n\n# These are customarily filled with swig artefacts\nbindings/python\nbindings/java\nbindings/node.js\n\nbindings/emscripten\nbin/\nobj/\n\nzig-out\n.zig-cache\n"
  },
  {
    "path": ".golangci.yml",
    "content": "version: \"2\"\nlinters:\n  default: all\n  disable:\n    # just whining\n    - copyloopvar                       # go>=1.22\n    - cyclop\n    - dupword\n    - forbidigo\n    - funlen\n    - gochecknoglobals\n    - gochecknoinits\n    - gocognit\n    - gocritic\n    - gocyclo\n    - godot\n    - intrange                          # go>=1.22\n    - lll\n    - mnd\n    - nestif\n    - nlreturn\n    - varnamelen\n    - whitespace\n    - wsl\n    - wsl_v5\n    # auto-generation artefact\n    - dupl\n    # maybe some day...\n    - godoclint\n    - godox\n    - maintidx\n    # maybe some day in tests...\n    - forcetypeassert\n    - nonamedreturns\n    - perfsprint\n    - testpackage\n    # 83 active linters remaining including gosec, gosimple, govet, etc.\n  settings:\n    revive:\n      enable-all-rules: true\n      rules:\n        - name: add-constant\n          disabled: true\n        - name: argument-limit\n          disabled: true\n        - name: cognitive-complexity    # similar to 'gocognit' above\n          disabled: true\n        - name: cyclomatic              # similar to 'cyclop' & 'gocyclo' above\n          disabled: true\n        - name: empty-block\n          disabled: true\n        - name: empty-lines\n          disabled: true\n        - name: flag-parameter\n          disabled: true\n        - name: function-length         # similar to 'funlen' above\n          disabled: true\n        - name: function-result-limit\n          disabled: true\n        - name: increment-decrement\n          disabled: true\n        - name: line-length-limit       # similar to 'lll' above\n          disabled: true\n        - name: max-public-structs\n          disabled: true\n        - name: package-directory-mismatch\n          disabled: true\n        - name: receiver-naming\n          disabled: true\n        - name: var-naming\n          disabled: true\n        - name: unchecked-type-assertion # similar to 'forcetypeassert' above\n          disabled: true\n        - name: unexported-naming\n          disabled: true\n        - name: unhandled-error\n          arguments:\n            - fmt.Println\n            - fmt.Printf\n        - name: use-any                 # applicable to go>=1.18 only\n          disabled: true\n  exclusions:\n    generated: lax\n    presets:\n      - comments\n      - common-false-positives\n      - legacy\n      - std-error-handling\n    paths:\n      - third_party$\n      - builtin$\n      - examples$\nformatters:\n  exclusions:\n    generated: lax\n    paths:\n      - third_party$\n      - builtin$\n      - examples$\n"
  },
  {
    "path": ".lgtm.yml",
    "content": "queries:\n  - include: \"*\"\n  - exclude: cpp/unused-static-function\n  - exclude: cpp/include-non-header\n  - exclude: cs/call-to-unmanaged-code\n  - exclude: cs/unmanaged-code\n\nextraction:\n  cpp:\n    index:\n      build_command:\n      - ./build.sh -m32\n  go:\n    index:\n      build_command:\n      - (cd bindings/go; go test -c)\n\n  csharp:\n    index:\n      nuget_restore: false\n\n"
  },
  {
    "path": ".travis.yml",
    "content": "branches:\n    only:\n      - /.*/\n\nlanguage: rust\n\ngit:\n    quiet: true\n\nos:\n    - linux\n\narch:\n    - arm64\n    - s390x\n\nbefore_script:\n    - lscpu 2>/dev/null && echo --- || true\n    - env | sort\n\nscript:\n    - if [ \"$TRAVIS_LANGUAGE\" = \"rust\" ]; then\n          if [ \"$TRAVIS_OS_NAME\" = \"windows\" ]; then\n              rustup set default-host x86_64-pc-windows-msvc;\n              export ML=-nologo;\n          fi;\n          ( cd bindings/rust;\n            if [ -f target/Cargo.lock ]; then\n                mv -f target/Cargo.lock .;\n            fi;\n            NOW=`date +%s`;\n            REF=.cargo/registry/index/*/.last-updated;\n            THEN=`(stat -c %Y \"$TRAVIS_HOME\"/$REF || stat -f %m \"$TRAVIS_HOME\"/$REF) 2>/dev/null`;\n            if [ $(($NOW - ${THEN:-0})) -gt 604800 ]; then\n                env CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse cargo update;\n            fi;\n            cargo test --release )\n      fi\n    - if which go > /dev/null 2>&1; then\n          go version;\n          if ! (grep -q -e '^flags.*\\badx\\b' /proc/cpuinfo) 2>/dev/null; then\n              export CGO_CFLAGS=\"-O -D__BLST_PORTABLE__\";\n          fi;\n          (cd bindings/go; go test -test.v)\n      fi\n\nmatrix:\n    include:\n      - os: linux\n        arch: arm64\n        language: go\n\nnotifications:\n    email: false\n\nbefore_cache:\n    - if [ \"$TRAVIS_LANGUAGE\" = \"rust\" ]; then\n        ( cd bindings/rust;\n          cargo clean -p blst; cargo clean -p blst --release;\n          rm -rf target/.rustc_info.json;\n          rm -rf target/{debug,release}/incremental;\n          mv -f Cargo.lock target )\n      fi\n    - (cd \"$TRAVIS_HOME\"; rm -rf .cargo/registry/src)\n    - (cd \"$TRAVIS_HOME\"; rm -rf .cargo/registry/index/*/.cache)\n\ncache:\n    cargo: true\n    directories:\n      - bindings/rust/target\n\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "[![Actions status](https://github.com/supranational/blst/workflows/build/badge.svg)](https://github.com/supranational/blst/actions) [![CodeQL status](https://github.com/supranational/blst/workflows/CodeQL/badge.svg)](https://github.com/supranational/blst/actions/workflows/codeql-analysis.yml)\n<div align=\"left\">\n  <img src=blst_logo_small.png>\n</div>\n\n# blst\nblst (pronounced 'blast') is a BLS12-381 signature library focused on performance and security. It is written in C and assembly.\n\n## Table of Contents\n\n  * [Status](#status)\n  * [General notes on implementation](#general-notes-on-implementation)\n  * [Platform and Language Compatibility](#platform-and-language-compatibility)\n  * [API](#api)\n  * [Introductory Tutorial](#introductory-tutorial)\n    + [Public Keys and Signatures](#public-keys-and-signatures)\n    + [Signature Verification](#signature-verification)\n    + [Signature Aggregation](#signature-aggregation)\n    + [Serialization Format](#serialization-format)\n  * [Build](#build)\n    + [C static library](#c-static-library)\n  * [Language-specific notes](#language-specific-notes)\n    + [Go](#go)\n    + [Rust](#rust)\n  * [Repository Structure](#repository-structure)\n  * [Performance](#performance)\n  * [License](#license)\n\n## Status\n**This library is under active development**\n\nAn initial audit of this library was conducted by NCC Group in January 2021 and can be found [here](https://research.nccgroup.com/wp-content/uploads/2021/01/NCC_Group_EthereumFoundation_ETHF002_Report_2021-01-20_v1.0.pdf).\n\nFormal verification of this library by Galois is on-going and can be found [here](https://github.com/GaloisInc/BLST-Verification).\n\nThis library is compliant with the following IETF draft specifications:\n- [IETF BLS Signature V6](https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature)\n- [IETF RFC 9380 Hashing to Elliptic Curves](https://www.rfc-editor.org/rfc/rfc9380.html)\n\nThe serialization formatting is implemented according to [the ZCash definition](#serialization-format).\n\n## General notes on implementation\nThe goal of the blst library is to provide a foundational component for applications and other libraries that require high performance and formally verified BLS12-381 operations. With that in mind some decisions are made to maximize the public good beyond BLS12-381. For example, the field operations are optimized for general 384-bit usage, as opposed to tuned specifically for the 381-bit BLS12-381 curve parameters. With the formal verification of these foundational components, we believe they can provide a reliable building block for other curves that would like high performance and an extra element of security.\n\nThe library deliberately abstains from dealing with memory management and multi-threading, with the rationale that these ultimately belong in language-specific bindings. Another responsibility that is left to application is random number generation. All this in the name of run-time neutrality, which makes integration into more stringent environments like Intel SGX or ARM TrustZone trivial.\n\n## Platform and Language Compatibility\n\nThis library primarily supports x86_64 and ARM64 hardware platforms, and Linux, Mac, and Windows operating systems. But it does have a portable replacement for the assembly modules, which can be compiled for a plethora of other platforms. Problem reports for these will be considered and are likely to be addressed.\n\nThis repository includes explicit bindings for:\n- [Go](bindings/go)\n- [Rust](bindings/rust)\n\nUnless deemed appropriate to implement, bindings for other languages will be provided using [SWIG](http://swig.org). Proof-of-concept scripts are available for:\n- [Python](bindings/python)\n- [Java](bindings/java)\n- [Node.js](bindings/node.js)\n- [Emscripten](bindings/emscripten)\n- [C#](bindings/c%23)\n- [Zig](bindings/zig)\n\n## API\n\nThe blst API is defined in the C header [bindings/blst.h](bindings/blst.h). The API can be categorized as follows, with some example operations:\n- Field Operations (add, sub, mul, neg, inv, to/from Montgomery)\n- Curve Operations (add, double, mul, to/from affine, group check)\n- Intermediate (hash to curve, pairing, serdes)\n- BLS12-381 signature (sign, verify, aggregate)\n\nNote: there is also an auxiliary header file, [bindings/blst_aux.h](bindings/blst_aux.h), that is used as a staging area for experimental interfaces that may or may not get promoted to blst.h.\n\n## Introductory Tutorial\n\nProgramming is understanding, and understanding implies mastering the lingo. So we have a pair of additive groups being mapped to multiplicative one... What does it mean? Well, this tutorial is not about explaining that, but rather about making the connection between what you're supposed to know about [pairing-based cryptography](https://en.wikipedia.org/wiki/Pairing-based_cryptography) and the interface provided by the library.\n\n### Public Keys and Signatures\n\nWe have two elliptic curves, E1 and E2, points on which are contained in `blst_p1` and `blst_p2`, or `blst_p1_affine` and `blst_p2_affine` structures. Elements in the multiplicative group are held in a `blst_fp12` structure. One of the curves, or more specifically, a subset of points that form a cyclic group, is chosen for public keys, and another, for signatures. The choice is denoted by the subroutines' suffixes, `_pk_in_g1` or `_pk_in_g2`. The most common choice appears to be the former, that is, `blst_p1` for public keys, and `blst_p2` for signatures. But it all starts with a secret key...\n\nThe secret key is held in a 256-bit `blst_scalar` structure which can be instantiated with either [`blst_keygen`](https://tools.ietf.org/html/draft-irtf-cfrg-bls-signature#section-2.3), or deserialized with `blst_scalar_from_bendian` or `blst_scalar_from_lendian` from a previously serialized byte sequence. It shouldn't come as surprise that there are two uses for a secret key:\n\n- generating the associated public key, either with `blst_sk_to_pk_in_g1` or `blst_sk_to_pk_in_g2`;\n- performing a sign operation, either with `blst_sign_pk_in_g1` or `blst_sign_pk_in_g2`;\n\nAs for signing, unlike what your intuition might suggest, `blst_sign_*` doesn't sign a message, but rather a point on the corresponding elliptic curve. You can obtain this point from a message by calling `blst_hash_to_g2` or `blst_encode_to_g2` (see the [IETF hash-to-curve](https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve#section-3) draft for distinction). Another counter-intuitive aspect is the apparent g1 vs. g2 naming mismatch, in the sense that `blst_sign_pk_in_g1` accepts output from `blst_hash_to_g2`, and `blst_sign_pk_in_g2` accepts output from `blst_hash_to_g1`. This is because, as you should recall, public keys and signatures come from complementary groups.\n\nNow that you have a public key and signature, as points on corresponding elliptic curves, you can serialize them with `blst_p1_serialize`/`blst_p1_compress` and `blst_p2_serialize`/`blst_p2_compress` and send the resulting byte sequences over the network for deserialization/uncompression and verification.\n\n### Signature Verification\n\nEven though there are \"single-shot\" `blst_core_verify_pk_in_g1` and `blst_core_verify_pk_in_g2`, you should really familiarize yourself with the more generalized pairing interface. `blst_pairing` is an opaque structure, and the only thing you know about it is `blst_pairing_sizeof`, which is how much memory you're supposed to allocate for it. In order to verify an aggregated signature for a set of public keys and messages, or just one[!], you would:\n```\nblst_pairing_init(ctx, hash_or_encode, domain_separation_tag);\nblst_pairing_aggregate_pk_in_g1(ctx, PK[0], aggregated_signature, message[0]);\nblst_pairing_aggregate_pk_in_g1(ctx, PK[1], NULL, message[1]);\n...\nblst_pairing_commit(ctx);\nresult = blst_pairing_finalverify(ctx, NULL);\n```\n**The essential point to note** is that it's the caller's responsibility to ensure that public keys are group-checked with `blst_p1_affine_in_g1`. This is because it's a relatively expensive operation and it's naturally assumed that the application would cache the check's outcome. Signatures are group-checked internally. Not shown in the pseudo-code snippet above, but `aggregate` and `commit` calls return `BLST_ERROR` denoting success or failure in performing the operation. Call to `finalverify`, on the other hand, returns boolean.\n\nAnother, potentially more useful usage pattern is:\n```\nblst_p2_affine_in_g2(signature);\nblst_aggregated_in_g2(gtsig, signature);\nblst_pairing_init(ctx, hash_or_encode, domain_separation_tag);\nblst_pairing_aggregate_pk_in_g1(ctx, PK[0], NULL, message[0]);\nblst_pairing_aggregate_pk_in_g1(ctx, PK[1], NULL, message[1]);\n...\nblst_pairing_commit(ctx);\nresult = blst_pairing_finalverify(ctx, gtsig);\n```\nWhat is useful about it is that `aggregated_signature` can be handled in a separate thread. And while we are at it, aggregate calls can also be executed in different threads. This naturally implies that each thread will operate on its own `blst_pairing` context, which will have to be combined with `blst_pairing_merge` as threads join.\n\n### Signature Aggregation\n\nAggregation is a trivial operation of performing point additions, with `blst_p2_add_or_double_affine` or `blst_p1_add_or_double_affine`. Note that the accumulator is a non-affine point.\n\n---\n\nThat's about what you need to know to get started with nitty-gritty of actual function declarations.\n\n### Serialization Format\n\nFrom the ZCash BLS12-381 specification\n\n* Fq elements are encoded in big-endian form. They occupy 48 bytes in this form.\n* Fq2 elements are encoded in big-endian form, meaning that the Fq2 element c0 + c1 * u is represented by the Fq element c1 followed by the Fq element c0. This means Fq2 elements occupy 96 bytes in this form.\n* The group G1 uses Fq elements for coordinates. The group G2 uses Fq2 elements for coordinates.\n* G1 and G2 elements can be encoded in uncompressed form (the x-coordinate followed by the y-coordinate) or in compressed form (just the x-coordinate). G1 elements occupy 96 bytes in uncompressed form, and 48 bytes in compressed form. G2 elements occupy 192 bytes in uncompressed form, and 96 bytes in compressed form.\n\nThe most-significant three bits of a G1 or G2 encoding should be masked away before the coordinate(s) are interpreted. These bits are used to unambiguously represent the underlying element:\n\n* The most significant bit, when set, indicates that the point is in compressed form. Otherwise, the point is in uncompressed form.\n* The second-most significant bit indicates that the point is at infinity. If this bit is set, the remaining bits of the group element's encoding should be set to zero.\n* The third-most significant bit is set if (and only if) this point is in compressed form _and_ it is not the point at infinity _and_ its y-coordinate is the lexicographically largest of the two associated with the encoded x-coordinate.\n\n## Build\nThe build process is very simple and only requires a C compiler. It's integrated into the Go and Rust ecosystems, so that respective users would go about as they would with any other external module. Otherwise, a binary library would have to be compiled.\n\n### C static library\nA static library called libblst.a can be built in the current working directory of the user's choice:\n\nLinux, Mac, and Windows (in MinGW or Cygwin environments)\n```\n/some/where/build.sh\n```\n\nWindows (Visual C)\n```\n\\some\\where\\build.bat\n```\n\nIf final application crashes with an \"illegal instruction\" exception [after copying to another system], pass <nobr>`-D__BLST_PORTABLE__`</nobr> on `build.sh` command line. If you don't use build.sh, complement the `CFLAGS` environment variable with the said command line option. If you compile a Go application, you will need to modify the `CGO_CFLAGS` variable instead. And if you compile a Rust application, you can pass <nobr>`--features portable`</nobr> to `cargo build`. Alternatively, if you compile on an older Intel system, but will execute application on a newer one, consider instead passing <nobr>`--features force-adx`</nobr> for better performance.\n\n## Language-specific notes\n\n### [Go](bindings/go)\nThere are two primary modes of operation that can be chosen based on type definitions in the application.\n\nFor minimal-pubkey-size operations:\n```\ntype PublicKey = blst.P1Affine\ntype Signature = blst.P2Affine\ntype AggregateSignature = blst.P2Aggregate\ntype AggregatePublicKey = blst.P1Aggregate\n```\n\nFor minimal-signature-size operations:\n```\ntype PublicKey = blst.P2Affine\ntype Signature = blst.P1Affine\ntype AggregateSignature = blst.P1Aggregate\ntype AggregatePublicKey = blst.P2Aggregate\n```\n\nFor more details see the Go binding [readme](bindings/go/README.md).\n\n### [Rust](bindings/rust)\n[`blst`](https://crates.io/crates/blst) is the Rust binding crate.\n\nTo use min-pk version:\n```\nuse blst::min_pk::*;\n```\n\nTo use min-sig version:\n```\nuse blst::min_sig::*;\n```\n\nFor more details see the Rust binding [readme](bindings/rust/README.md).\n\n## Repository Structure\n\n**Root** - Contains various configuration files, documentation, licensing, and a build script\n* **Bindings** - Contains the files that define the blst interface\n    * blst.h - provides C API to blst library\n    * blst_aux.h - contains experimental functions not yet committed for long-term maintenance\n    * blst.hpp - provides foundational class-oriented C++ interface to blst library\n    * blst.swg - provides SWIG definitions for creating blst bindings for other languages, such as Java and Python\n    * **C#** - folder containing C# bindings and an example of how to use them\n    * **Emscripten**  - folder containing an example of how to use Emscripten WebAssembly bindings from Javascript\n    * **Go** - folder containing Go bindings for blst, including tests and benchmarks\n    * **Java** - folder containing an example of how to use SWIG Java bindings for blst\n    * **Node.js** - folder containing an example of how to use SWIG Javascript bindings for blst\n    * **Python** - folder containing an example of how to use SWIG Python bindings for blst\n    * **Rust** - folder containing Rust bindings for blst, including tests and benchmarks\n    * **Vectors**\n        * **Hash_to_curve**: folder containing test for hash_to_curve from IETF specification\n* **Src** - folder containing C code for lower level blst functions such as field operations, extension field operations, hash-to-field, and more\n    * **Asm** - folder containing Perl scripts that are used to generate assembly code for different hardware platforms including x86 with ADX instructions, x86 without ADX instructions, and ARMv8, and [ABI](https://en.wikipedia.org/wiki/Application_binary_interface)[1]\n* **Build** - this folder containing a set of pre-generated assembly files for a variety of operating systems and maintenance scripts.\n    * **Cheri** - assembly code for use on [CHERI](https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/) platforms\n    * **Coff** - assembly code for use on Windows systems with GNU or LLVM toolchain\n    * **Elf** - assembly code for use on Unix systems\n    * **Mach-o** - assembly code for use on Apple operating systems\n    * **Win64** - assembly code for use on Windows systems with Microsoft toolchain\n\n[1]: See [refresh.sh](build/refresh.sh) for usage. This method allows for simple reuse of optimized assembly across various platforms with minimal effort.\n\n## Performance\nCurrently both the [Go](bindings/go) and [Rust](bindings/rust) bindings provide benchmarks for a variety of signature related operations.\n\n## License\nThe blst library is licensed under the [Apache License Version 2.0](LICENSE) software license.\n"
  },
  {
    "path": "SECURITY.md",
    "content": "# Security Policy\n\n## Reporting a Vulnerability\n\nTo report security issues please send an e-mail to hello@supranational.net. \n\nFor sensitive information or critical issues, please contact the above e-mail address with 'CRITICAL' in the subject line and we will respond with a mechanism to securely communicate.\n\nPlease try to provide a clear description of any issue reported, along with how to reproduce the issue if possible.\n"
  },
  {
    "path": "bindings/blst.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLST_H__\n#define __BLST_H__\n\n#ifdef __SIZE_TYPE__\ntypedef __SIZE_TYPE__ size_t;\n#else\n#include <stddef.h>\n#endif\n\n#if defined(__UINT8_TYPE__) && defined(__UINT32_TYPE__) \\\n                            && defined(__UINT64_TYPE__)\ntypedef __UINT8_TYPE__  uint8_t;\ntypedef __UINT32_TYPE__ uint32_t;\ntypedef __UINT64_TYPE__ uint64_t;\n#else\n#include <stdint.h>\n#endif\n\n#ifdef __cplusplus\nextern \"C\" {\n#elif !defined(__STDC_VERSION__) || __STDC_VERSION__<202311\n# if defined(__BLST_CGO__)\ntypedef _Bool bool; /* it's assumed that cgo calls modern enough compiler */\n# elif defined(__BLST_RUST_BINDGEN__) || defined(__BLST_ZIG__)\n#  define bool _Bool\n# elif defined(__STDC_VERSION__) && __STDC_VERSION__>=199901\n#  include <stdbool.h>\n# elif !defined(bool)\n#  define bool int\n#  define __blst_h_bool__\n# endif\n#endif\n\n#ifdef SWIG\n# define DEFNULL =NULL\n#elif defined __cplusplus\n# define DEFNULL =0\n#else\n# define DEFNULL\n#endif\n\ntypedef enum {\n    BLST_SUCCESS = 0,\n    BLST_BAD_ENCODING,\n    BLST_POINT_NOT_ON_CURVE,\n    BLST_POINT_NOT_IN_GROUP,\n    BLST_AGGR_TYPE_MISMATCH,\n    BLST_VERIFY_FAIL,\n    BLST_PK_IS_INFINITY,\n    BLST_BAD_SCALAR,\n} BLST_ERROR;\n\ntypedef uint8_t byte;\ntypedef uint64_t limb_t;\n\ntypedef struct { byte b[256/8]; } blst_scalar;\ntypedef struct { limb_t l[256/8/sizeof(limb_t)]; } blst_fr;\ntypedef struct { limb_t l[384/8/sizeof(limb_t)]; } blst_fp;\n/* 0 is \"real\" part, 1 is \"imaginary\" */\ntypedef struct { blst_fp fp[2]; } blst_fp2;\ntypedef struct { blst_fp2 fp2[3]; } blst_fp6;\ntypedef struct { blst_fp6 fp6[2]; } blst_fp12;\n\nvoid blst_scalar_from_uint32(blst_scalar *out, const uint32_t a[8]);\nvoid blst_uint32_from_scalar(uint32_t out[8], const blst_scalar *a);\nvoid blst_scalar_from_uint64(blst_scalar *out, const uint64_t a[4]);\nvoid blst_uint64_from_scalar(uint64_t out[4], const blst_scalar *a);\nvoid blst_scalar_from_bendian(blst_scalar *out, const byte a[32]);\nvoid blst_bendian_from_scalar(byte out[32], const blst_scalar *a);\nvoid blst_scalar_from_lendian(blst_scalar *out, const byte a[32]);\nvoid blst_lendian_from_scalar(byte out[32], const blst_scalar *a);\nbool blst_scalar_fr_check(const blst_scalar *a);\nbool blst_sk_check(const blst_scalar *a);\nbool blst_sk_add_n_check(blst_scalar *out, const blst_scalar *a,\n                                           const blst_scalar *b);\nbool blst_sk_sub_n_check(blst_scalar *out, const blst_scalar *a,\n                                           const blst_scalar *b);\nbool blst_sk_mul_n_check(blst_scalar *out, const blst_scalar *a,\n                                           const blst_scalar *b);\nvoid blst_sk_inverse(blst_scalar *out, const blst_scalar *a);\nbool blst_scalar_from_le_bytes(blst_scalar *out, const byte *in, size_t len);\nbool blst_scalar_from_be_bytes(blst_scalar *out, const byte *in, size_t len);\n\n#ifndef SWIG\n/*\n * BLS12-381-specific Fr operations.\n */\nvoid blst_fr_add(blst_fr *ret, const blst_fr *a, const blst_fr *b);\nvoid blst_fr_sub(blst_fr *ret, const blst_fr *a, const blst_fr *b);\nvoid blst_fr_mul_by_3(blst_fr *ret, const blst_fr *a);\nvoid blst_fr_lshift(blst_fr *ret, const blst_fr *a, size_t count);\nvoid blst_fr_rshift(blst_fr *ret, const blst_fr *a, size_t count);\nvoid blst_fr_mul(blst_fr *ret, const blst_fr *a, const blst_fr *b);\nvoid blst_fr_sqr(blst_fr *ret, const blst_fr *a);\nvoid blst_fr_cneg(blst_fr *ret, const blst_fr *a, bool flag);\nvoid blst_fr_eucl_inverse(blst_fr *ret, const blst_fr *a);\nvoid blst_fr_inverse(blst_fr *ret, const blst_fr *a);\n\nvoid blst_fr_from_uint64(blst_fr *ret, const uint64_t a[4]);\nvoid blst_uint64_from_fr(uint64_t ret[4], const blst_fr *a);\nvoid blst_fr_from_scalar(blst_fr *ret, const blst_scalar *a);\nvoid blst_scalar_from_fr(blst_scalar *ret, const blst_fr *a);\n\n/*\n * BLS12-381-specific Fp operations.\n */\nvoid blst_fp_add(blst_fp *ret, const blst_fp *a, const blst_fp *b);\nvoid blst_fp_sub(blst_fp *ret, const blst_fp *a, const blst_fp *b);\nvoid blst_fp_mul_by_3(blst_fp *ret, const blst_fp *a);\nvoid blst_fp_mul_by_8(blst_fp *ret, const blst_fp *a);\nvoid blst_fp_lshift(blst_fp *ret, const blst_fp *a, size_t count);\nvoid blst_fp_mul(blst_fp *ret, const blst_fp *a, const blst_fp *b);\nvoid blst_fp_sqr(blst_fp *ret, const blst_fp *a);\nvoid blst_fp_cneg(blst_fp *ret, const blst_fp *a, bool flag);\nvoid blst_fp_eucl_inverse(blst_fp *ret, const blst_fp *a);\nvoid blst_fp_inverse(blst_fp *ret, const blst_fp *a);\nbool blst_fp_sqrt(blst_fp *ret, const blst_fp *a);\n\nvoid blst_fp_from_uint32(blst_fp *ret, const uint32_t a[12]);\nvoid blst_uint32_from_fp(uint32_t ret[12], const blst_fp *a);\nvoid blst_fp_from_uint64(blst_fp *ret, const uint64_t a[6]);\nvoid blst_uint64_from_fp(uint64_t ret[6], const blst_fp *a);\nvoid blst_fp_from_bendian(blst_fp *ret, const byte a[48]);\nvoid blst_bendian_from_fp(byte ret[48], const blst_fp *a);\nvoid blst_fp_from_lendian(blst_fp *ret, const byte a[48]);\nvoid blst_lendian_from_fp(byte ret[48], const blst_fp *a);\n\n/*\n * BLS12-381-specific Fp2 operations.\n */\nvoid blst_fp2_add(blst_fp2 *ret, const blst_fp2 *a, const blst_fp2 *b);\nvoid blst_fp2_sub(blst_fp2 *ret, const blst_fp2 *a, const blst_fp2 *b);\nvoid blst_fp2_mul_by_3(blst_fp2 *ret, const blst_fp2 *a);\nvoid blst_fp2_mul_by_8(blst_fp2 *ret, const blst_fp2 *a);\nvoid blst_fp2_lshift(blst_fp2 *ret, const blst_fp2 *a, size_t count);\nvoid blst_fp2_mul(blst_fp2 *ret, const blst_fp2 *a, const blst_fp2 *b);\nvoid blst_fp2_sqr(blst_fp2 *ret, const blst_fp2 *a);\nvoid blst_fp2_cneg(blst_fp2 *ret, const blst_fp2 *a, bool flag);\nvoid blst_fp2_eucl_inverse(blst_fp2 *ret, const blst_fp2 *a);\nvoid blst_fp2_inverse(blst_fp2 *ret, const blst_fp2 *a);\nbool blst_fp2_sqrt(blst_fp2 *ret, const blst_fp2 *a);\n\n/*\n * BLS12-381-specific Fp12 operations.\n */\nvoid blst_fp12_sqr(blst_fp12 *ret, const blst_fp12 *a);\nvoid blst_fp12_cyclotomic_sqr(blst_fp12 *ret, const blst_fp12 *a);\nvoid blst_fp12_mul(blst_fp12 *ret, const blst_fp12 *a, const blst_fp12 *b);\nvoid blst_fp12_mul_by_xy00z0(blst_fp12 *ret, const blst_fp12 *a,\n                                             const blst_fp6 *xy00z0);\nvoid blst_fp12_conjugate(blst_fp12 *a);\nvoid blst_fp12_inverse(blst_fp12 *ret, const blst_fp12 *a);\n/* caveat lector! |n| has to be non-zero and not more than 3! */\nvoid blst_fp12_frobenius_map(blst_fp12 *ret, const blst_fp12 *a, size_t n);\nbool blst_fp12_is_equal(const blst_fp12 *a, const blst_fp12 *b);\nbool blst_fp12_is_one(const blst_fp12 *a);\nbool blst_fp12_in_group(const blst_fp12 *a);\nconst blst_fp12 *blst_fp12_one(void);\n#endif  // SWIG\n\n/*\n * BLS12-381-specific point operations.\n */\ntypedef struct { blst_fp x, y, z; } blst_p1;\ntypedef struct { blst_fp x, y; } blst_p1_affine;\n\nvoid blst_p1_add(blst_p1 *out, const blst_p1 *a, const blst_p1 *b);\nvoid blst_p1_add_or_double(blst_p1 *out, const blst_p1 *a, const blst_p1 *b);\nvoid blst_p1_add_affine(blst_p1 *out, const blst_p1 *a,\n                                      const blst_p1_affine *b);\nvoid blst_p1_add_or_double_affine(blst_p1 *out, const blst_p1 *a,\n                                                const blst_p1_affine *b);\nvoid blst_p1_double(blst_p1 *out, const blst_p1 *a);\nvoid blst_p1_mult(blst_p1 *out, const blst_p1 *p, const byte *scalar,\n                                                  size_t nbits);\nvoid blst_p1_cneg(blst_p1 *p, bool cbit);\nvoid blst_p1_to_affine(blst_p1_affine *out, const blst_p1 *in);\nvoid blst_p1_from_affine(blst_p1 *out, const blst_p1_affine *in);\nbool blst_p1_on_curve(const blst_p1 *p);\nbool blst_p1_in_g1(const blst_p1 *p);\nbool blst_p1_is_equal(const blst_p1 *a, const blst_p1 *b);\nbool blst_p1_is_inf(const blst_p1 *a);\nconst blst_p1 *blst_p1_generator(void);\n\nbool blst_p1_affine_on_curve(const blst_p1_affine *p);\nbool blst_p1_affine_in_g1(const blst_p1_affine *p);\nbool blst_p1_affine_is_equal(const blst_p1_affine *a, const blst_p1_affine *b);\nbool blst_p1_affine_is_inf(const blst_p1_affine *a);\nconst blst_p1_affine *blst_p1_affine_generator(void);\n\ntypedef struct { blst_fp2 x, y, z; } blst_p2;\ntypedef struct { blst_fp2 x, y; } blst_p2_affine;\n\nvoid blst_p2_add(blst_p2 *out, const blst_p2 *a, const blst_p2 *b);\nvoid blst_p2_add_or_double(blst_p2 *out, const blst_p2 *a, const blst_p2 *b);\nvoid blst_p2_add_affine(blst_p2 *out, const blst_p2 *a,\n                                      const blst_p2_affine *b);\nvoid blst_p2_add_or_double_affine(blst_p2 *out, const blst_p2 *a,\n                                                const blst_p2_affine *b);\nvoid blst_p2_double(blst_p2 *out, const blst_p2 *a);\nvoid blst_p2_mult(blst_p2 *out, const blst_p2 *p, const byte *scalar,\n                                                  size_t nbits);\nvoid blst_p2_cneg(blst_p2 *p, bool cbit);\nvoid blst_p2_to_affine(blst_p2_affine *out, const blst_p2 *in);\nvoid blst_p2_from_affine(blst_p2 *out, const blst_p2_affine *in);\nbool blst_p2_on_curve(const blst_p2 *p);\nbool blst_p2_in_g2(const blst_p2 *p);\nbool blst_p2_is_equal(const blst_p2 *a, const blst_p2 *b);\nbool blst_p2_is_inf(const blst_p2 *a);\nconst blst_p2 *blst_p2_generator(void);\n\nbool blst_p2_affine_on_curve(const blst_p2_affine *p);\nbool blst_p2_affine_in_g2(const blst_p2_affine *p);\nbool blst_p2_affine_is_equal(const blst_p2_affine *a, const blst_p2_affine *b);\nbool blst_p2_affine_is_inf(const blst_p2_affine *a);\nconst blst_p2_affine *blst_p2_affine_generator(void);\n\n/*\n * Multi-scalar multiplications and other multi-point operations.\n */\n\nvoid blst_p1s_to_affine(blst_p1_affine dst[], const blst_p1 *const points[],\n                        size_t npoints);\nvoid blst_p1s_add(blst_p1 *ret, const blst_p1_affine *const points[],\n                                size_t npoints);\n\nsize_t blst_p1s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints);\nvoid blst_p1s_mult_wbits_precompute(blst_p1_affine table[], size_t wbits,\n                                    const blst_p1_affine *const points[],\n                                    size_t npoints);\nsize_t blst_p1s_mult_wbits_scratch_sizeof(size_t npoints);\nvoid blst_p1s_mult_wbits(blst_p1 *ret, const blst_p1_affine table[],\n                         size_t wbits, size_t npoints,\n                         const byte *const scalars[], size_t nbits,\n                         limb_t *scratch);\n\nsize_t blst_p1s_mult_pippenger_scratch_sizeof(size_t npoints);\nvoid blst_p1s_mult_pippenger(blst_p1 *ret, const blst_p1_affine *const points[],\n                             size_t npoints, const byte *const scalars[],\n                             size_t nbits, limb_t *scratch);\nvoid blst_p1s_tile_pippenger(blst_p1 *ret, const blst_p1_affine *const points[],\n                             size_t npoints, const byte *const scalars[],\n                             size_t nbits, limb_t *scratch,\n                             size_t bit0, size_t window);\n\nvoid blst_p2s_to_affine(blst_p2_affine dst[], const blst_p2 *const points[],\n                        size_t npoints);\nvoid blst_p2s_add(blst_p2 *ret, const blst_p2_affine *const points[],\n                                size_t npoints);\n\nsize_t blst_p2s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints);\nvoid blst_p2s_mult_wbits_precompute(blst_p2_affine table[], size_t wbits,\n                                    const blst_p2_affine *const points[],\n                                    size_t npoints);\nsize_t blst_p2s_mult_wbits_scratch_sizeof(size_t npoints);\nvoid blst_p2s_mult_wbits(blst_p2 *ret, const blst_p2_affine table[],\n                         size_t wbits, size_t npoints,\n                         const byte *const scalars[], size_t nbits,\n                         limb_t *scratch);\n\nsize_t blst_p2s_mult_pippenger_scratch_sizeof(size_t npoints);\nvoid blst_p2s_mult_pippenger(blst_p2 *ret, const blst_p2_affine *const points[],\n                             size_t npoints, const byte *const scalars[],\n                             size_t nbits, limb_t *scratch);\nvoid blst_p2s_tile_pippenger(blst_p2 *ret, const blst_p2_affine *const points[],\n                             size_t npoints, const byte *const scalars[],\n                             size_t nbits, limb_t *scratch,\n                             size_t bit0, size_t window);\n\n/*\n * Hash-to-curve operations.\n */\n#ifndef SWIG\nvoid blst_map_to_g1(blst_p1 *out, const blst_fp *u, const blst_fp *v DEFNULL);\nvoid blst_map_to_g2(blst_p2 *out, const blst_fp2 *u, const blst_fp2 *v DEFNULL);\n#endif\n\nvoid blst_encode_to_g1(blst_p1 *out,\n                       const byte *msg, size_t msg_len,\n                       const byte *DST DEFNULL, size_t DST_len DEFNULL,\n                       const byte *aug DEFNULL, size_t aug_len DEFNULL);\nvoid blst_hash_to_g1(blst_p1 *out,\n                     const byte *msg, size_t msg_len,\n                     const byte *DST DEFNULL, size_t DST_len DEFNULL,\n                     const byte *aug DEFNULL, size_t aug_len DEFNULL);\n\nvoid blst_encode_to_g2(blst_p2 *out,\n                       const byte *msg, size_t msg_len,\n                       const byte *DST DEFNULL, size_t DST_len DEFNULL,\n                       const byte *aug DEFNULL, size_t aug_len DEFNULL);\nvoid blst_hash_to_g2(blst_p2 *out,\n                     const byte *msg, size_t msg_len,\n                     const byte *DST DEFNULL, size_t DST_len DEFNULL,\n                     const byte *aug DEFNULL, size_t aug_len DEFNULL);\n\n/*\n * Zcash-compatible serialization/deserialization.\n */\nvoid blst_p1_serialize(byte out[96], const blst_p1 *in);\nvoid blst_p1_compress(byte out[48], const blst_p1 *in);\nvoid blst_p1_affine_serialize(byte out[96], const blst_p1_affine *in);\nvoid blst_p1_affine_compress(byte out[48], const blst_p1_affine *in);\nBLST_ERROR blst_p1_uncompress(blst_p1_affine *out, const byte in[48]);\nBLST_ERROR blst_p1_deserialize(blst_p1_affine *out, const byte in[96]);\n\nvoid blst_p2_serialize(byte out[192], const blst_p2 *in);\nvoid blst_p2_compress(byte out[96], const blst_p2 *in);\nvoid blst_p2_affine_serialize(byte out[192], const blst_p2_affine *in);\nvoid blst_p2_affine_compress(byte out[96], const blst_p2_affine *in);\nBLST_ERROR blst_p2_uncompress(blst_p2_affine *out, const byte in[96]);\nBLST_ERROR blst_p2_deserialize(blst_p2_affine *out, const byte in[192]);\n\n/*\n * Specification defines two variants, 'minimal-signature-size' and\n * 'minimal-pubkey-size'. To unify appearance we choose to distinguish\n * them by suffix referring to the public key type, more specifically\n * _pk_in_g1 corresponds to 'minimal-pubkey-size' and _pk_in_g2 - to\n * 'minimal-signature-size'. It might appear a bit counterintuitive\n * in sign call, but no matter how you twist it, something is bound to\n * turn a little odd.\n */\n/*\n * Secret-key operations.\n */\nvoid blst_keygen(blst_scalar *out_SK, const byte *IKM, size_t IKM_len,\n                 const byte *info DEFNULL, size_t info_len DEFNULL);\nvoid blst_sk_to_pk_in_g1(blst_p1 *out_pk, const blst_scalar *SK);\nvoid blst_sign_pk_in_g1(blst_p2 *out_sig, const blst_p2 *hash,\n                                          const blst_scalar *SK);\nvoid blst_sk_to_pk_in_g2(blst_p2 *out_pk, const blst_scalar *SK);\nvoid blst_sign_pk_in_g2(blst_p1 *out_sig, const blst_p1 *hash,\n                                          const blst_scalar *SK);\n\n/*\n * Pairing interface.\n */\n#ifndef SWIG\nvoid blst_miller_loop(blst_fp12 *ret, const blst_p2_affine *Q,\n                                      const blst_p1_affine *P);\nvoid blst_miller_loop_n(blst_fp12 *ret, const blst_p2_affine *const Qs[],\n                                        const blst_p1_affine *const Ps[],\n                                        size_t n);\nvoid blst_final_exp(blst_fp12 *ret, const blst_fp12 *f);\nvoid blst_precompute_lines(blst_fp6 Qlines[68], const blst_p2_affine *Q);\nvoid blst_miller_loop_lines(blst_fp12 *ret, const blst_fp6 Qlines[68],\n                                            const blst_p1_affine *P);\nbool blst_fp12_finalverify(const blst_fp12 *gt1, const blst_fp12 *gt2);\n#endif\n\n#ifdef __BLST_CGO__\ntypedef limb_t blst_pairing;\n#elif defined(__BLST_RUST_BINDGEN__)\ntypedef struct {} blst_pairing;\n#else\ntypedef struct blst_opaque blst_pairing;\n#endif\n\nsize_t blst_pairing_sizeof(void);\nvoid blst_pairing_init(blst_pairing *new_ctx, bool hash_or_encode,\n                       const byte *DST DEFNULL, size_t DST_len DEFNULL);\nconst byte *blst_pairing_get_dst(const blst_pairing *ctx);\nvoid blst_pairing_commit(blst_pairing *ctx);\nBLST_ERROR blst_pairing_aggregate_pk_in_g2(blst_pairing *ctx,\n                                           const blst_p2_affine *PK,\n                                           const blst_p1_affine *signature,\n                                           const byte *msg, size_t msg_len,\n                                           const byte *aug DEFNULL,\n                                           size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_chk_n_aggr_pk_in_g2(blst_pairing *ctx,\n                                            const blst_p2_affine *PK,\n                                            bool pk_grpchk,\n                                            const blst_p1_affine *signature,\n                                            bool sig_grpchk,\n                                            const byte *msg, size_t msg_len,\n                                            const byte *aug DEFNULL,\n                                            size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g2(blst_pairing *ctx,\n                                                 const blst_p2_affine *PK,\n                                                 const blst_p1_affine *sig,\n                                                 const byte *scalar,\n                                                 size_t nbits,\n                                                 const byte *msg,\n                                                 size_t msg_len,\n                                                 const byte *aug DEFNULL,\n                                                 size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g2(blst_pairing *ctx,\n                                                  const blst_p2_affine *PK,\n                                                  bool pk_grpchk,\n                                                  const blst_p1_affine *sig,\n                                                  bool sig_grpchk,\n                                                  const byte *scalar,\n                                                  size_t nbits,\n                                                  const byte *msg,\n                                                  size_t msg_len,\n                                                  const byte *aug DEFNULL,\n                                                  size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_aggregate_pk_in_g1(blst_pairing *ctx,\n                                           const blst_p1_affine *PK,\n                                           const blst_p2_affine *signature,\n                                           const byte *msg, size_t msg_len,\n                                           const byte *aug DEFNULL,\n                                           size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_chk_n_aggr_pk_in_g1(blst_pairing *ctx,\n                                            const blst_p1_affine *PK,\n                                            bool pk_grpchk,\n                                            const blst_p2_affine *signature,\n                                            bool sig_grpchk,\n                                            const byte *msg, size_t msg_len,\n                                            const byte *aug DEFNULL,\n                                            size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g1(blst_pairing *ctx,\n                                                 const blst_p1_affine *PK,\n                                                 const blst_p2_affine *sig,\n                                                 const byte *scalar,\n                                                 size_t nbits,\n                                                 const byte *msg,\n                                                 size_t msg_len,\n                                                 const byte *aug DEFNULL,\n                                                 size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g1(blst_pairing *ctx,\n                                                  const blst_p1_affine *PK,\n                                                  bool pk_grpchk,\n                                                  const blst_p2_affine *sig,\n                                                  bool sig_grpchk,\n                                                  const byte *scalar,\n                                                  size_t nbits,\n                                                  const byte *msg,\n                                                  size_t msg_len,\n                                                  const byte *aug DEFNULL,\n                                                  size_t aug_len DEFNULL);\nBLST_ERROR blst_pairing_merge(blst_pairing *ctx, const blst_pairing *ctx1);\nbool blst_pairing_finalverify(const blst_pairing *ctx,\n                              const blst_fp12 *gtsig DEFNULL);\n\n\n/*\n * Customarily applications aggregate signatures separately.\n * In which case application would have to pass NULLs for |signature|\n * to blst_pairing_aggregate calls and pass aggregated signature\n * collected with these calls to blst_pairing_finalverify. Inputs are\n * Zcash-compatible \"straight-from-wire\" byte vectors, compressed or\n * not.\n */\nBLST_ERROR blst_aggregate_in_g1(blst_p1 *out, const blst_p1 *in,\n                                              const byte *zwire);\nBLST_ERROR blst_aggregate_in_g2(blst_p2 *out, const blst_p2 *in,\n                                              const byte *zwire);\n\nvoid blst_aggregated_in_g1(blst_fp12 *out, const blst_p1_affine *signature);\nvoid blst_aggregated_in_g2(blst_fp12 *out, const blst_p2_affine *signature);\n\n/*\n * \"One-shot\" CoreVerify entry points.\n */\nBLST_ERROR blst_core_verify_pk_in_g1(const blst_p1_affine *pk,\n                                     const blst_p2_affine *signature,\n                                     bool hash_or_encode,\n                                     const byte *msg, size_t msg_len,\n                                     const byte *DST DEFNULL,\n                                     size_t DST_len DEFNULL,\n                                     const byte *aug DEFNULL,\n                                     size_t aug_len DEFNULL);\nBLST_ERROR blst_core_verify_pk_in_g2(const blst_p2_affine *pk,\n                                     const blst_p1_affine *signature,\n                                     bool hash_or_encode,\n                                     const byte *msg, size_t msg_len,\n                                     const byte *DST DEFNULL,\n                                     size_t DST_len DEFNULL,\n                                     const byte *aug DEFNULL,\n                                     size_t aug_len DEFNULL);\n\nextern const blst_p1_affine BLS12_381_G1;\nextern const blst_p1_affine BLS12_381_NEG_G1;\nextern const blst_p2_affine BLS12_381_G2;\nextern const blst_p2_affine BLS12_381_NEG_G2;\n\n#include \"blst_aux.h\"\n\n#ifdef __cplusplus\n}\n#elif defined(__blst_h_bool__)\n# undef __blst_h_bool__\n# undef bool\n#endif\n#endif\n"
  },
  {
    "path": "bindings/blst.hpp",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLST_HPP__\n#define __BLST_HPP__\n\n#if !defined(SWIG) && __cplusplus < 201103L \\\n                   && (!defined(_MSVC_LANG) || _MSVC_LANG < 201103L)\n# error \"C++11 or later is required to compile <blst>/bindings/blst.hpp\"\n#endif\n\n#include <string>\n#include <cstring>\n#include <vector>\n#include <memory>\n\nnamespace blst {\n\n#ifdef __clang__\n# pragma GCC diagnostic push\n# pragma GCC diagnostic ignored \"-Wextern-c-compat\"\n#endif\n\n#include \"blst.h\"\n\n#ifdef __clang__\n# pragma GCC diagnostic pop\n#endif\n\nstruct bytes_t {\n    const byte* ptr;\n    size_t len;\n\n    bytes_t() = default;\n    bytes_t(const byte* p, size_t l) : ptr{p}, len{l} {}\n    template<template<typename, typename...> class C, typename T>\n    bytes_t(const C<T>& c)\n    {\n        static_assert(sizeof(T) == 1, \"unsupported type\");\n        ptr = reinterpret_cast<const byte*>(c.data());\n        len = c.size();\n    }\n    template<template<typename, size_t> class C, typename T, size_t N>\n    bytes_t(const C<T, N>& c)\n    {\n        static_assert(sizeof(T) == 1, \"unsupported type\");\n        ptr = reinterpret_cast<const byte*>(c.data());\n        len = c.size();\n    }\n};\n\nclass P1_Affine;\nclass P1;\nclass P2_Affine;\nclass P2;\nclass Pairing;\n\ninline const byte *C_bytes(const void *ptr)\n{   return static_cast<const byte*>(ptr);   }\n\n/*\n * As for SecretKey being struct and not class, and lack of constructors\n * with one accepting for example |IKM|. We can't make assumptions about\n * application's policy toward handling secret key material. Hence it's\n * argued that application is entitled for transparent structure, not\n * opaque or semi-opaque class. And in the context it's appropriate not\n * to \"entice\" developers with idiomatic constructors:-) Though this\n * doesn't really apply to SWIG-assisted interfaces...\n */\nstruct SecretKey {\n#ifdef SWIG\nprivate:\n#endif\n    blst_scalar key;\n\n#ifdef SWIG\npublic:\n#endif\n#ifndef SWIG\n    void keygen(const byte* IKM, size_t IKM_len,\n                const std::string& info = \"\")\n    {   blst_keygen(&key, IKM, IKM_len, C_bytes(info.data()), info.size());   }\n    void keygen_v3(const byte* IKM, size_t IKM_len,\n                   const std::string& info = \"\")\n    {   blst_keygen_v3(&key, IKM, IKM_len, C_bytes(info.data()), info.size());   }\n    void keygen_v4_5(const byte* IKM, size_t IKM_len,\n                     const byte* salt, size_t salt_len,\n                     const std::string& info = \"\")\n    {   blst_keygen_v4_5(&key, IKM, IKM_len, salt, salt_len,\n                               C_bytes(info.data()), info.size());\n    }\n    void keygen_v5(const byte* IKM, size_t IKM_len,\n                   const byte* salt, size_t salt_len,\n                   const std::string& info = \"\")\n    {   blst_keygen_v5(&key, IKM, IKM_len, salt, salt_len,\n                             C_bytes(info.data()), info.size());\n    }\n#endif\n    void keygen(bytes_t IKM, const std::string& info = \"\")\n    {   keygen(IKM.ptr, IKM.len, info);   }\n    void keygen_v3(bytes_t IKM, const std::string& info = \"\")\n    {   keygen_v3(IKM.ptr, IKM.len, info);   }\n    void keygen_v4_5(bytes_t IKM, bytes_t salt, const std::string& info = \"\")\n    {   keygen_v4_5(IKM.ptr, IKM.len, salt.ptr, salt.len, info);   }\n    void keygen_v5(bytes_t IKM, bytes_t salt, const std::string& info = \"\")\n    {   keygen_v5(IKM.ptr, IKM.len, salt.ptr, salt.len, info);   }\n    void derive_master_eip2333(const byte* IKM, size_t IKM_len)\n    {   blst_derive_master_eip2333(&key, IKM, IKM_len);   }\n    void derive_child_eip2333(const SecretKey& SK, unsigned int child_index)\n    {   blst_derive_child_eip2333(&key, &SK.key, child_index);   }\n\n    void from_bendian(const byte in[32]) { blst_scalar_from_bendian(&key, in); }\n    void from_lendian(const byte in[32]) { blst_scalar_from_lendian(&key, in); }\n\n    void to_bendian(byte out[32]) const\n    {   blst_bendian_from_scalar(out, &key);   }\n    void to_lendian(byte out[32]) const\n    {   blst_lendian_from_scalar(out, &key);   }\n};\n\nclass Scalar {\nprivate:\n    blst_scalar val;\n\npublic:\n    Scalar() { memset(&val, 0, sizeof(val)); }\n    Scalar(const byte* scalar, size_t nbits)\n    {   blst_scalar_from_le_bytes(&val, scalar, (nbits+7)/8);   }\n#ifndef SWIG\n    Scalar(const byte *msg, size_t msg_len, const std::string& DST)\n    {   (void)hash_to(msg, msg_len, DST);   }\n\n    Scalar* hash_to(const byte *msg, size_t msg_len, const std::string& DST = \"\")\n    {   byte elem[48];\n        blst_expand_message_xmd(elem, sizeof(elem), msg, msg_len,\n                                                    C_bytes(DST.data()), DST.size());\n        blst_scalar_from_be_bytes(&val, elem, sizeof(elem));\n        return this;\n    }\n#endif\n    Scalar(bytes_t msg, const std::string& DST)\n    {   (void)hash_to(msg.ptr, msg.len, DST);   }\n    Scalar* hash_to(bytes_t msg, const std::string& DST = \"\")\n    {   return hash_to(msg.ptr, msg.len, DST);   }\n\n    Scalar dup() const { return *this; }\n    Scalar* from_bendian(const byte *msg, size_t msg_len)\n    {   blst_scalar_from_be_bytes(&val, msg, msg_len); return this;   }\n    Scalar* from_lendian(const byte *msg, size_t msg_len)\n    {   blst_scalar_from_le_bytes(&val, msg, msg_len); return this;   }\n    void to_bendian(byte out[32]) const\n    {   blst_bendian_from_scalar(out, &val);   }\n    void to_lendian(byte out[32]) const\n    {   blst_lendian_from_scalar(out, &val);   }\n\n    Scalar* add(const Scalar& a)\n    {   if (!blst_sk_add_n_check(&val, &val, a))\n            throw BLST_BAD_SCALAR;\n        return this;\n    }\n    Scalar* add(const SecretKey& a)\n    {   if (!blst_sk_add_n_check(&val, &val, &a.key))\n            throw BLST_BAD_SCALAR;\n        return this;\n    }\n    Scalar* sub(const Scalar& a)\n    {   if (!blst_sk_sub_n_check(&val, &val, a))\n            throw BLST_BAD_SCALAR;\n        return this;\n    }\n    Scalar* mul(const Scalar& a)\n    {   if (!blst_sk_mul_n_check(&val, &val, a))\n            throw BLST_BAD_SCALAR;\n        return this;\n    }\n    Scalar* inverse()\n    {   blst_sk_inverse(&val, &val); return this;   }\n\nprivate:\n    friend class P1;\n    friend class P2;\n    operator const blst_scalar*() const { return &val; }\n    operator const byte*() const        { return val.b; }\n};\n\nclass P1_Affine {\nprivate:\n    blst_p1_affine point;\n\n    P1_Affine(const blst_p1_affine *cptr) { point = *cptr; }\npublic:\n    P1_Affine() { memset(&point, 0, sizeof(point)); }\n#ifndef SWIG\n    P1_Affine(const byte *in)\n    {   BLST_ERROR err = blst_p1_deserialize(&point, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n    }\n#endif\n    P1_Affine(const byte *in, size_t len)\n    {   if (len == 0 || len != (in[0]&0x80 ? 48 : 96))\n            throw BLST_BAD_ENCODING;\n        BLST_ERROR err = blst_p1_deserialize(&point, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n    }\n    P1_Affine(const P1& jacobian);\n\n    P1_Affine dup() const { return *this; }\n    P1 to_jacobian() const;\n    void serialize(byte out[96]) const\n    {   blst_p1_affine_serialize(out, &point);   }\n    void compress(byte out[48]) const\n    {   blst_p1_affine_compress(out, &point);   }\n    bool on_curve() const { return blst_p1_affine_on_curve(&point); }\n    bool in_group() const { return blst_p1_affine_in_g1(&point);    }\n    bool is_inf() const   { return blst_p1_affine_is_inf(&point);   }\n    bool is_equal(const P1_Affine& p) const\n    {   return blst_p1_affine_is_equal(&point, &p.point);   }\n#ifndef SWIG\n    BLST_ERROR core_verify(const P2_Affine& pk, bool hash_or_encode,\n                           const byte* msg, size_t msg_len,\n                           const std::string& DST = \"\",\n                           const byte* aug = nullptr, size_t aug_len = 0) const;\n#endif\n    BLST_ERROR core_verify(const P2_Affine& pk, bool hash_or_encode,\n                           bytes_t msg, const std::string& DST = \"\",\n                           bytes_t aug = {nullptr, 0}) const\n    {   return core_verify(pk, hash_or_encode, msg.ptr, msg.len, DST,\n                                               aug.ptr, aug.len);\n    }\n    static P1_Affine generator()\n    {   return P1_Affine(blst_p1_affine_generator());   }\n\nprivate:\n    friend class Pairing;\n    friend class P2_Affine;\n    friend class PT;\n    friend class P1;\n    friend class P1_Affines;\n    operator const blst_p1_affine*() const { return &point; }\n    operator blst_p1_affine*()             { return &point; }\n};\n\nclass P1 {\nprivate:\n    blst_p1 point;\n\n    P1(const blst_p1 *cptr) { point = *cptr; }\npublic:\n    P1() { memset(&point, 0, sizeof(point)); }\n    P1(const SecretKey& sk) { blst_sk_to_pk_in_g1(&point, &sk.key); }\n#ifndef SWIG\n    P1(const byte *in)\n    {   blst_p1_affine a;\n        BLST_ERROR err = blst_p1_deserialize(&a, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n        blst_p1_from_affine(&point, &a);\n    }\n#endif\n    P1(const byte *in, size_t len)\n    {   if (len == 0 || len != (in[0]&0x80 ? 48 : 96))\n            throw BLST_BAD_ENCODING;\n        blst_p1_affine a;\n        BLST_ERROR err = blst_p1_deserialize(&a, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n        blst_p1_from_affine(&point, &a);\n    }\n    P1(const P1_Affine& affine) { blst_p1_from_affine(&point, affine); }\n\n    P1 dup() const                      { return *this; }\n    P1_Affine to_affine() const         { return P1_Affine(*this);           }\n    void serialize(byte out[96]) const  { blst_p1_serialize(out, &point);    }\n    void compress(byte out[48]) const   { blst_p1_compress(out, &point);     }\n    bool on_curve() const               { return blst_p1_on_curve(&point);   }\n    bool in_group() const               { return blst_p1_in_g1(&point);      }\n    bool is_inf() const                 { return blst_p1_is_inf(&point);     }\n    bool is_equal(const P1& p) const\n    {   return blst_p1_is_equal(&point, &p.point);   }\n    void aggregate(const P1_Affine& in)\n    {   if (blst_p1_affine_in_g1(in))\n            blst_p1_add_or_double_affine(&point, &point, in);\n        else\n            throw BLST_POINT_NOT_IN_GROUP;\n    }\n    P1* sign_with(const SecretKey& sk)\n    {   blst_sign_pk_in_g2(&point, &point, &sk.key); return this;   }\n    P1* sign_with(const Scalar& scalar)\n    {   blst_sign_pk_in_g2(&point, &point, scalar); return this;   }\n    P1* hash_to(bytes_t msg, const std::string& DST = \"\",\n                bytes_t aug = {nullptr, 0})\n    {   blst_hash_to_g1(&point, msg.ptr, msg.len, C_bytes(DST.data()), DST.size(),\n                                aug.ptr, aug.len);\n        return this;\n    }\n    P1* encode_to(bytes_t msg, const std::string& DST = \"\",\n                  bytes_t aug = {nullptr, 0})\n    {   blst_encode_to_g1(&point, msg.ptr, msg.len, C_bytes(DST.data()), DST.size(),\n                                  aug.ptr, aug.len);\n        return this;\n    }\n#ifndef SWIG\n    P1* hash_to(const byte* msg, size_t msg_len,\n                const std::string& DST = \"\",\n                const byte* aug = nullptr, size_t aug_len = 0)\n    {   blst_hash_to_g1(&point, msg, msg_len, C_bytes(DST.data()), DST.size(),\n                                aug, aug_len);\n        return this;\n    }\n    P1* encode_to(const byte* msg, size_t msg_len,\n                  const std::string& DST = \"\",\n                  const byte* aug = nullptr, size_t aug_len = 0)\n    {   blst_encode_to_g1(&point, msg, msg_len, C_bytes(DST.data()), DST.size(),\n                                  aug, aug_len);\n        return this;\n    }\n#endif\n    P1* mult(const byte* scalar, size_t nbits)\n    {   blst_p1_mult(&point, &point, scalar, nbits); return this;   }\n    P1* mult(const Scalar& scalar)\n    {   blst_p1_mult(&point, &point, scalar, 255); return this;   }\n    P1* cneg(bool flag)\n    {   blst_p1_cneg(&point, flag); return this;   }\n    P1* neg()\n    {   blst_p1_cneg(&point, true); return this;   }\n    P1* add(const P1& a)\n    {   blst_p1_add_or_double(&point, &point, a); return this;   }\n    P1* add(const P1_Affine &a)\n    {   blst_p1_add_or_double_affine(&point, &point, a); return this;   }\n    P1* dbl()\n    {   blst_p1_double(&point, &point); return this;   }\n#ifndef SWIG\n    static P1 add(const P1& a, const P1& b)\n    {   P1 ret; blst_p1_add_or_double(ret, a, b); return ret;   }\n    static P1 add(const P1& a, const P1_Affine& b)\n    {   P1 ret; blst_p1_add_or_double_affine(ret, a, b); return ret;   }\n    static P1 dbl(const P1& a)\n    {   P1 ret; blst_p1_double(ret, a); return ret;   }\n#endif\n    static P1 generator()\n    {   return P1(blst_p1_generator());   }\n\nprivate:\n    friend class P1_Affine;\n    friend class P1_Affines;\n    operator const blst_p1*() const { return &point; }\n    operator blst_p1*()             { return &point; }\n};\n\nclass P1_Affines {\nprivate:\n    struct p1_affine_no_init {\n        blst_p1_affine point;\n        p1_affine_no_init() { }\n        operator blst_p1_affine*()              { return &point; }\n        operator const blst_p1_affine*() const  { return &point; }\n    };\n\n    std::vector<p1_affine_no_init> table;\n    size_t wbits, npoints;\n\npublic:\n#ifndef SWIG\n    P1_Affines() {}\n    P1_Affines(size_t wbits, const P1_Affine* const points[], size_t npoints)\n    {   this->wbits = wbits;\n        this->npoints = npoints;\n        table.resize(npoints << (wbits-1));\n        blst_p1s_mult_wbits_precompute(table.at(0), wbits,\n                        reinterpret_cast<const blst_p1_affine *const*>(points),\n                        npoints);\n    }\n    P1_Affines(size_t wbits, const P1_Affine points[], size_t npoints)\n    {   const P1_Affine* const ptrs[2] = { points, nullptr };\n        P1_Affines(wbits, ptrs, npoints);\n    }\n    P1_Affines(size_t wbits, const std::vector<P1_Affine>& points)\n    {   P1_Affines(wbits, &points.at(0), points.size());   }\n\n    P1_Affines(size_t wbits, const P1* const points[], size_t npoints)\n    {   size_t cap = npoints << (wbits-1);\n\n        this->wbits = wbits;\n        this->npoints = npoints;\n        table.resize(cap);\n        blst_p1s_to_affine(table.at(cap-npoints),\n                           reinterpret_cast<const blst_p1 *const*>(points),\n                           npoints);\n        const blst_p1_affine* const ptrs[2] = { table[cap-npoints], nullptr };\n        blst_p1s_mult_wbits_precompute(table[0], wbits, ptrs, npoints);\n    }\n    P1_Affines(size_t wbits, const P1 points[], size_t npoints)\n    {   const P1* const ptrs[2] = { points, nullptr };\n        P1_Affines(wbits, ptrs, npoints);\n    }\n    P1_Affines(size_t wbits, const std::vector<P1>& points)\n    {   P1_Affines(wbits, &points.at(0), points.size());   }\n\n    P1_Affines(const P1* const points[], size_t npoints)\n    {   this->wbits = 0;\n        this->npoints = npoints;\n        table.resize(npoints);\n        blst_p1s_to_affine(table.at(0),\n                           reinterpret_cast<const blst_p1 *const*>(points),\n                           npoints);\n    }\n    P1_Affines(const P1 points[], size_t npoints)\n    {   const P1* const ptrs[2] = { points, nullptr };\n        P1_Affines(ptrs, npoints);\n    }\n    P1_Affines(const std::vector<P1>& points)\n    {   P1_Affines(&points.at(0), points.size());   }\n\n    P1 mult(const byte* const scalars[], size_t nbits) const\n    {   P1 ret;\n\n        if (wbits != 0) {\n            size_t sz = blst_p1s_mult_wbits_scratch_sizeof(npoints);\n            std::unique_ptr<limb_t[]> scratch{new limb_t[sz/sizeof(limb_t)]};\n            blst_p1s_mult_wbits(ret, table.at(0), wbits, npoints,\n                                     scalars, nbits, scratch.get());\n        } else {\n            size_t sz = blst_p1s_mult_pippenger_scratch_sizeof(npoints);\n            std::unique_ptr<limb_t[]> scratch{new limb_t[sz/sizeof(limb_t)]};\n            const blst_p1_affine* const ptrs[2] = { table.at(0), nullptr };\n            blst_p1s_mult_pippenger(ret, ptrs, npoints,\n                                         scalars, nbits, scratch.get());\n        }\n        return ret;\n    }\n\n    static std::vector<P1_Affine> from(const P1* const points[], size_t npoints)\n    {   std::vector<P1_Affine> ret;\n        ret.resize(npoints);\n        blst_p1s_to_affine(reinterpret_cast<blst_p1_affine*>(&ret.at(0)),\n                           reinterpret_cast<const blst_p1 *const*>(points),\n                           npoints);\n        return ret;\n    }\n    static std::vector<P1_Affine> from(const P1 points[], size_t npoints)\n    {   const P1* const ptrs[2] = { points, nullptr };\n        return from(ptrs, npoints);\n    }\n    static std::vector<P1_Affine> from(const std::vector<P1>& points)\n    {   return from(&points.at(0), points.size());   }\n#endif\n\n    static P1 mult_pippenger(const P1_Affine* const points[], size_t npoints,\n                             const byte* const scalars[], size_t nbits)\n    {   P1 ret;\n        size_t sz = blst_p1s_mult_pippenger_scratch_sizeof(npoints);\n        std::unique_ptr<limb_t[]> scratch{new limb_t[sz/sizeof(limb_t)]};\n        blst_p1s_mult_pippenger(ret,\n                    reinterpret_cast<const blst_p1_affine *const*>(points),\n                    npoints, scalars, nbits, scratch.get());\n        return ret;\n    }\n#ifndef SWIG\n    static P1 mult_pippenger(const P1_Affine points[], size_t npoints,\n                             const byte* const scalars[], size_t nbits)\n    {   const P1_Affine* const ptrs[2] = { points, nullptr };\n        return mult_pippenger(ptrs, npoints, scalars, nbits);\n    }\n    static P1 mult_pippenger(const std::vector<P1_Affine>& points,\n                             const byte* const scalars[], size_t nbits)\n    {   return mult_pippenger(&points.at(0), points.size(), scalars, nbits);   }\n#endif\n\n    static P1 add(const P1_Affine* const points[], size_t npoints)\n    {   P1 ret;\n        blst_p1s_add(ret,\n                     reinterpret_cast<const blst_p1_affine *const*>(points),\n                     npoints);\n        return ret;\n    }\n#ifndef SWIG\n    static P1 add(const P1_Affine points[], size_t npoints)\n    {   const P1_Affine* const ptrs[2] = { points, nullptr };\n        return add(ptrs, npoints);\n    }\n    static P1 add(const std::vector<P1_Affine>& points)\n    {   return add(&points.at(0), points.size());   }\n#endif\n};\n\nclass P2_Affine {\nprivate:\n    blst_p2_affine point;\n\n    P2_Affine(const blst_p2_affine *cptr) { point = *cptr; }\npublic:\n    P2_Affine() { memset(&point, 0, sizeof(point)); }\n#ifndef SWIG\n    P2_Affine(const byte *in)\n    {   BLST_ERROR err = blst_p2_deserialize(&point, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n    }\n#endif\n    P2_Affine(const byte *in, size_t len)\n    {   if (len == 0 || len != (in[0]&0x80 ? 96 : 192))\n            throw BLST_BAD_ENCODING;\n        BLST_ERROR err = blst_p2_deserialize(&point, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n    }\n    P2_Affine(const P2& jacobian);\n\n    P2_Affine dup() const { return *this; }\n    P2 to_jacobian() const;\n    void serialize(byte out[192]) const\n    {   blst_p2_affine_serialize(out, &point);   }\n    void compress(byte out[96]) const\n    {   blst_p2_affine_compress(out, &point);   }\n    bool on_curve() const { return blst_p2_affine_on_curve(&point); }\n    bool in_group() const { return blst_p2_affine_in_g2(&point);    }\n    bool is_inf() const   { return blst_p2_affine_is_inf(&point);   }\n    bool is_equal(const P2_Affine& p) const\n    {   return blst_p2_affine_is_equal(&point, &p.point);   }\n#ifndef SWIG\n    BLST_ERROR core_verify(const P1_Affine& pk, bool hash_or_encode,\n                           const byte* msg, size_t msg_len,\n                           const std::string& DST = \"\",\n                           const byte* aug = nullptr, size_t aug_len = 0) const;\n#endif\n    BLST_ERROR core_verify(const P1_Affine& pk, bool hash_or_encode,\n                           bytes_t msg, const std::string& DST = \"\",\n                           bytes_t aug = {nullptr, 0}) const\n    {   return core_verify(pk, hash_or_encode, msg.ptr, msg.len, DST,\n                                               aug.ptr, aug.len);\n    }\n    static P2_Affine generator()\n    {   return P2_Affine(blst_p2_affine_generator());   }\n\nprivate:\n    friend class Pairing;\n    friend class P1_Affine;\n    friend class PT;\n    friend class P2;\n    friend class P2_Affines;\n    operator const blst_p2_affine*() const { return &point; }\n    operator blst_p2_affine*()             { return &point; }\n};\n\nclass P2 {\nprivate:\n    blst_p2 point;\n\n    P2(const blst_p2 *cptr) { point = *cptr; }\npublic:\n    P2() { memset(&point, 0, sizeof(point)); }\n    P2(const SecretKey& sk) { blst_sk_to_pk_in_g2(&point, &sk.key); }\n#ifndef SWIG\n    P2(const byte *in)\n    {   blst_p2_affine a;\n        BLST_ERROR err = blst_p2_deserialize(&a, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n        blst_p2_from_affine(&point, &a);\n    }\n#endif\n    P2(const byte *in, size_t len)\n    {   if (len == 0 || len != (in[0]&0x80 ? 96 : 192))\n            throw BLST_BAD_ENCODING;\n        blst_p2_affine a;\n        BLST_ERROR err = blst_p2_deserialize(&a, in);\n        if (err != BLST_SUCCESS)\n            throw err;\n        blst_p2_from_affine(&point, &a);\n    }\n    P2(const P2_Affine& affine) { blst_p2_from_affine(&point, affine); }\n\n    P2 dup() const                      { return *this; }\n    P2_Affine to_affine() const         { return P2_Affine(*this);          }\n    void serialize(byte out[192]) const { blst_p2_serialize(out, &point);   }\n    void compress(byte out[96]) const   { blst_p2_compress(out, &point);    }\n    bool on_curve() const               { return blst_p2_on_curve(&point);  }\n    bool in_group() const               { return blst_p2_in_g2(&point);     }\n    bool is_inf() const                 { return blst_p2_is_inf(&point);    }\n    bool is_equal(const P2& p) const\n    {   return blst_p2_is_equal(&point, &p.point);   }\n    void aggregate(const P2_Affine& in)\n    {   if (blst_p2_affine_in_g2(in))\n            blst_p2_add_or_double_affine(&point, &point, in);\n        else\n            throw BLST_POINT_NOT_IN_GROUP;\n    }\n    P2* sign_with(const SecretKey& sk)\n    {   blst_sign_pk_in_g1(&point, &point, &sk.key); return this;   }\n    P2* sign_with(const Scalar& scalar)\n    {   blst_sign_pk_in_g1(&point, &point, scalar); return this;   }\n    P2* hash_to(bytes_t msg, const std::string& DST = \"\",\n                bytes_t aug = {nullptr, 0})\n    {   blst_hash_to_g2(&point, msg.ptr, msg.len, C_bytes(DST.data()), DST.size(),\n                                aug.ptr, aug.len);\n        return this;\n    }\n    P2* encode_to(bytes_t msg, const std::string& DST = \"\",\n                  bytes_t aug = {nullptr, 0})\n    {   blst_encode_to_g2(&point, msg.ptr, msg.len, C_bytes(DST.data()), DST.size(),\n                                  aug.ptr, aug.len);\n        return this;\n    }\n#ifndef SWIG\n    P2* hash_to(const byte* msg, size_t msg_len,\n                const std::string& DST = \"\",\n                const byte* aug = nullptr, size_t aug_len = 0)\n    {   blst_hash_to_g2(&point, msg, msg_len, C_bytes(DST.data()), DST.size(),\n                                aug, aug_len);\n        return this;\n    }\n    P2* encode_to(const byte* msg, size_t msg_len,\n                  const std::string& DST = \"\",\n                  const byte* aug = nullptr, size_t aug_len = 0)\n    {   blst_encode_to_g2(&point, msg, msg_len, C_bytes(DST.data()), DST.size(),\n                                  aug, aug_len);\n        return this;\n    }\n#endif\n\n    P2* mult(const byte* scalar, size_t nbits)\n    {   blst_p2_mult(&point, &point, scalar, nbits); return this;   }\n    P2* mult(const Scalar& scalar)\n    {   blst_p2_mult(&point, &point, scalar, 255); return this;   }\n    P2* cneg(bool flag)\n    {   blst_p2_cneg(&point, flag); return this;   }\n    P2* neg()\n    {   blst_p2_cneg(&point, true); return this;   }\n    P2* add(const P2& a)\n    {   blst_p2_add_or_double(&point, &point, a); return this;   }\n    P2* add(const P2_Affine &a)\n    {   blst_p2_add_or_double_affine(&point, &point, a); return this;   }\n    P2* dbl()\n    {   blst_p2_double(&point, &point); return this;   }\n#ifndef SWIG\n    static P2 add(const P2& a, const P2& b)\n    {   P2 ret; blst_p2_add_or_double(ret, a, b); return ret;   }\n    static P2 add(const P2& a, const P2_Affine& b)\n    {   P2 ret; blst_p2_add_or_double_affine(ret, a, b); return ret;   }\n    static P2 dbl(const P2& a)\n    {   P2 ret; blst_p2_double(ret, a); return ret;   }\n#endif\n    static P2 generator()\n    {   return P2(blst_p2_generator());   }\n\nprivate:\n    friend class P2_Affine;\n    friend class P2_Affines;\n    operator const blst_p2*() const { return &point; }\n    operator blst_p2*()             { return &point; }\n};\n\nclass P2_Affines {\nprivate:\n    struct p2_affine_no_init {\n        blst_p2_affine point;\n        p2_affine_no_init() { }\n        operator blst_p2_affine*()              { return &point; }\n        operator const blst_p2_affine*() const  { return &point; }\n    };\n\n    std::vector<p2_affine_no_init> table;\n    size_t wbits, npoints;\n\npublic:\n#ifndef SWIG\n    P2_Affines() {}\n    P2_Affines(size_t wbits, const P2_Affine* const points[], size_t npoints)\n    {   this->wbits = wbits;\n        this->npoints = npoints;\n        table.resize(npoints << (wbits-1));\n        blst_p2s_mult_wbits_precompute(table.at(0), wbits,\n                        reinterpret_cast<const blst_p2_affine *const*>(points),\n                        npoints);\n    }\n    P2_Affines(size_t wbits, const P2_Affine points[], size_t npoints)\n    {   const P2_Affine* const ptrs[2] = { points, nullptr };\n        P2_Affines(wbits, ptrs, npoints);\n    }\n    P2_Affines(size_t wbits, const std::vector<P2_Affine>& points)\n    {   P2_Affines(wbits, &points.at(0), points.size());   }\n\n    P2_Affines(size_t wbits, const P2* const points[], size_t npoints)\n    {   size_t cap = npoints << (wbits-1);\n\n        this->wbits = wbits;\n        this->npoints = npoints;\n        table.resize(cap);\n        blst_p2s_to_affine(table.at(cap-npoints),\n                           reinterpret_cast<const blst_p2 *const*>(points),\n                           npoints);\n        const blst_p2_affine* const ptrs[2] = { table[cap-npoints], nullptr };\n        blst_p2s_mult_wbits_precompute(table[0], wbits, ptrs, npoints);\n    }\n    P2_Affines(size_t wbits, const P2 points[], size_t npoints)\n    {   const P2* const ptrs[2] = { points, nullptr };\n        P2_Affines(wbits, ptrs, npoints);\n    }\n    P2_Affines(size_t wbits, const std::vector<P2>& points)\n    {   P2_Affines(wbits, &points.at(0), points.size());   }\n\n    P2_Affines(const P2* const points[], size_t npoints)\n    {   this->wbits = 0;\n        this->npoints = npoints;\n        table.resize(npoints);\n        blst_p2s_to_affine(table.at(0),\n                           reinterpret_cast<const blst_p2 *const*>(points),\n                           npoints);\n    }\n    P2_Affines(const P2 points[], size_t npoints)\n    {   const P2* const ptrs[2] = { points, nullptr };\n        P2_Affines(ptrs, npoints);\n    }\n    P2_Affines(const std::vector<P2>& points)\n    {   P2_Affines(&points.at(0), points.size());   }\n\n    P2 mult(const byte* const scalars[], size_t nbits) const\n    {   P2 ret;\n\n        if (wbits != 0) {\n            size_t sz = blst_p2s_mult_wbits_scratch_sizeof(npoints);\n            std::unique_ptr<limb_t[]> scratch{new limb_t[sz/sizeof(limb_t)]};\n            blst_p2s_mult_wbits(ret, table.at(0), wbits, npoints,\n                                     scalars, nbits, scratch.get());\n        } else {\n            size_t sz = blst_p2s_mult_pippenger_scratch_sizeof(npoints);\n            std::unique_ptr<limb_t[]> scratch{new limb_t[sz/sizeof(limb_t)]};\n            const blst_p2_affine* const ptrs[2] = { table.at(0), nullptr };\n            blst_p2s_mult_pippenger(ret, ptrs, npoints,\n                                         scalars, nbits, scratch.get());\n        }\n        return ret;\n    }\n\n    static std::vector<P2_Affine> from(const P2* const points[], size_t npoints)\n    {   std::vector<P2_Affine> ret;\n        ret.resize(npoints);\n        blst_p2s_to_affine(reinterpret_cast<blst_p2_affine*>(&ret.at(0)),\n                           reinterpret_cast<const blst_p2 *const*>(points),\n                           npoints);\n        return ret;\n    }\n    static std::vector<P2_Affine> from(const P2 points[], size_t npoints)\n    {   const P2* const ptrs[2] = { points, nullptr };\n        return from(ptrs, npoints);\n    }\n    static std::vector<P2_Affine> from(const std::vector<P2>& points)\n    {   return from(&points.at(0), points.size());   }\n#endif\n\n    static P2 mult_pippenger(const P2_Affine* const points[], size_t npoints,\n                             const byte* const scalars[], size_t nbits)\n    {   P2 ret;\n        size_t sz = blst_p2s_mult_pippenger_scratch_sizeof(npoints);\n        std::unique_ptr<limb_t[]> scratch{new limb_t[sz/sizeof(limb_t)]};\n        blst_p2s_mult_pippenger(ret,\n                    reinterpret_cast<const blst_p2_affine *const*>(points),\n                    npoints, scalars, nbits, scratch.get());\n        return ret;\n    }\n#ifndef SWIG\n    static P2 mult_pippenger(const P2_Affine points[], size_t npoints,\n                             const byte* const scalars[], size_t nbits)\n    {   const P2_Affine* const ptrs[2] = { points, nullptr };\n        return mult_pippenger(ptrs, npoints, scalars, nbits);\n    }\n    static P2 mult_pippenger(const std::vector<P2_Affine>& points,\n                             const byte* const scalars[], size_t nbits)\n    {   return mult_pippenger(&points.at(0), points.size(), scalars, nbits);   }\n#endif\n\n    static P2 add(const P2_Affine* const points[], size_t npoints)\n    {   P2 ret;\n        blst_p2s_add(ret,\n                     reinterpret_cast<const blst_p2_affine *const*>(points),\n                     npoints);\n        return ret;\n    }\n#ifndef SWIG\n    static P2 add(const P2_Affine points[], size_t npoints)\n    {   const P2_Affine* const ptrs[2] = { points, nullptr };\n        return add(ptrs, npoints);\n    }\n    static P2 add(const std::vector<P2_Affine>& points)\n    {   return add(&points.at(0), points.size());   }\n#endif\n};\n\ninline P1_Affine::P1_Affine(const P1& jacobian)\n{   blst_p1_to_affine(&point, jacobian);   }\ninline P2_Affine::P2_Affine(const P2& jacobian)\n{   blst_p2_to_affine(&point, jacobian);   }\n\ninline P1 P1_Affine::to_jacobian() const { P1 ret(*this); return ret; }\ninline P2 P2_Affine::to_jacobian() const { P2 ret(*this); return ret; }\n\ninline P1 G1() { return P1::generator();  }\ninline P2 G2() { return P2::generator();  }\n\n#ifndef SWIG\ninline BLST_ERROR P1_Affine::core_verify(const P2_Affine& pk,\n                                         bool hash_or_encode,\n                                         const byte* msg, size_t msg_len,\n                                         const std::string& DST,\n                                         const byte* aug, size_t aug_len) const\n{   return blst_core_verify_pk_in_g2(pk, &point, hash_or_encode,\n                                         msg, msg_len,\n                                         C_bytes(DST.data()), DST.size(),\n                                         aug, aug_len);\n}\ninline BLST_ERROR P2_Affine::core_verify(const P1_Affine& pk,\n                                         bool hash_or_encode,\n                                         const byte* msg, size_t msg_len,\n                                         const std::string& DST,\n                                         const byte* aug, size_t aug_len) const\n{   return blst_core_verify_pk_in_g1(pk, &point, hash_or_encode,\n                                         msg, msg_len,\n                                         C_bytes(DST.data()), DST.size(),\n                                         aug, aug_len);\n}\n#endif\n\nclass PT {\nprivate:\n    blst_fp12 value;\n\n    PT(const blst_fp12 *v)  { value = *v; }\npublic:\n    PT(const P1_Affine& p)  { blst_aggregated_in_g1(&value, p); }\n    PT(const P2_Affine& q)  { blst_aggregated_in_g2(&value, q); }\n    PT(const P2_Affine& q, const P1_Affine& p)\n    {   blst_miller_loop(&value, q, p);   }\n    PT(const P1_Affine& p, const P2_Affine& q) : PT(q, p) {}\n    PT(const P2& q, const P1& p)\n    {   blst_miller_loop(&value, P2_Affine(q), P1_Affine(p));   }\n    PT(const P1& p, const P2& q) : PT(q, p) {}\n\n    PT dup() const          { return *this; }\n    bool is_one() const     { return blst_fp12_is_one(&value); }\n    bool is_equal(const PT& p) const\n    {   return blst_fp12_is_equal(&value, p);   }\n    PT* sqr()               { blst_fp12_sqr(&value, &value);    return this; }\n    PT* mul(const PT& p)    { blst_fp12_mul(&value, &value, p); return this; }\n    PT* final_exp()         { blst_final_exp(&value, &value);   return this; }\n    bool in_group() const   { return blst_fp12_in_group(&value); }\n    void to_bendian(byte out[48*12]) const\n    {   blst_bendian_from_fp12(out, &value);   }\n\n    static bool finalverify(const PT& gt1, const PT& gt2)\n    {   return blst_fp12_finalverify(gt1, gt2);   }\n    static PT one() { return PT(blst_fp12_one()); }\n\nprivate:\n    friend class Pairing;\n    operator const blst_fp12*() const { return &value; }\n};\n\nclass Pairing {\nprivate:\n    operator blst_pairing*()\n    {   return reinterpret_cast<blst_pairing *>(this);   }\n    operator const blst_pairing*() const\n    {   return reinterpret_cast<const blst_pairing *>(this);   }\n\n    void init(bool hash_or_encode, const byte* DST, size_t DST_len)\n    {   // Copy DST to heap, std::string can be volatile, especially in SWIG:-(\n        byte *dst = new byte[DST_len];\n        memcpy(dst, DST, DST_len);\n        blst_pairing_init(*this, hash_or_encode, dst, DST_len);\n    }\n\npublic:\n#ifndef SWIG\n    void* operator new(size_t)\n    {   return new uint64_t[blst_pairing_sizeof()/sizeof(uint64_t)];   }\n    void operator delete(void *ptr)\n    {   delete[] static_cast<uint64_t*>(ptr);   }\n\n    Pairing(bool hash_or_encode, const std::string& DST)\n    {   init(hash_or_encode, C_bytes(DST.data()), DST.size());   }\n#endif\n#ifndef SWIGJAVA\n    Pairing(bool hash_or_encode, const byte* DST, size_t DST_len)\n    {   init(hash_or_encode, DST, DST_len);   }\n    ~Pairing() { delete[] blst_pairing_get_dst(*this); }\n#endif\n\n    BLST_ERROR aggregate(const P1_Affine* pk, const P2_Affine* sig,\n                         bytes_t msg, bytes_t aug = {nullptr, 0})\n    {   return blst_pairing_aggregate_pk_in_g1(*this, *pk, *sig,\n                         msg.ptr, msg.len, aug.ptr, aug.len);\n    }\n    BLST_ERROR aggregate(const P2_Affine* pk, const P1_Affine* sig,\n                         bytes_t msg, bytes_t aug = {nullptr, 0})\n    {   return blst_pairing_aggregate_pk_in_g2(*this, *pk, *sig,\n                         msg.ptr, msg.len, aug.ptr, aug.len);\n    }\n    BLST_ERROR mul_n_aggregate(const P1_Affine* pk, const P2_Affine* sig,\n                               const byte* scalar, size_t nbits,\n                               bytes_t msg, bytes_t aug = {nullptr, 0})\n    {   return blst_pairing_mul_n_aggregate_pk_in_g1(*this, *pk, *sig,\n                               scalar, nbits, msg.ptr, msg.len, aug.ptr, aug.len);\n    }\n    BLST_ERROR mul_n_aggregate(const P2_Affine* pk, const P1_Affine* sig,\n                               const byte* scalar, size_t nbits,\n                               bytes_t msg, bytes_t aug = {nullptr, 0})\n    {   return blst_pairing_mul_n_aggregate_pk_in_g2(*this, *pk, *sig,\n                               scalar, nbits, msg.ptr, msg.len, aug.ptr, aug.len);\n    }\n#ifndef SWIG\n    BLST_ERROR aggregate(const P1_Affine* pk, const P2_Affine* sig,\n                         const byte* msg, size_t msg_len,\n                         const byte* aug = nullptr, size_t aug_len = 0)\n    {   return blst_pairing_aggregate_pk_in_g1(*this, *pk, *sig,\n                         msg, msg_len, aug, aug_len);\n    }\n    BLST_ERROR aggregate(const P2_Affine* pk, const P1_Affine* sig,\n                         const byte* msg, size_t msg_len,\n                         const byte* aug = nullptr, size_t aug_len = 0)\n    {   return blst_pairing_aggregate_pk_in_g2(*this, *pk, *sig,\n                         msg, msg_len, aug, aug_len);\n    }\n    BLST_ERROR mul_n_aggregate(const P1_Affine* pk, const P2_Affine* sig,\n                               const byte* scalar, size_t nbits,\n                               const byte* msg, size_t msg_len,\n                               const byte* aug = nullptr, size_t aug_len = 0)\n    {   return blst_pairing_mul_n_aggregate_pk_in_g1(*this, *pk, *sig,\n                               scalar, nbits, msg, msg_len, aug, aug_len);\n    }\n    BLST_ERROR mul_n_aggregate(const P2_Affine* pk, const P1_Affine* sig,\n                               const byte* scalar, size_t nbits,\n                               const byte* msg, size_t msg_len,\n                               const byte* aug = nullptr, size_t aug_len = 0)\n    {   return blst_pairing_mul_n_aggregate_pk_in_g2(*this, *pk, *sig,\n                               scalar, nbits, msg, msg_len, aug, aug_len);\n    }\n#endif\n    void commit()\n    {   blst_pairing_commit(*this);   }\n    BLST_ERROR merge(const Pairing* ctx)\n    {   return blst_pairing_merge(*this, *ctx);   }\n    bool finalverify(const PT* sig = nullptr) const\n    {   return sig == nullptr ? blst_pairing_finalverify(*this, nullptr)\n                              : blst_pairing_finalverify(*this, *sig);\n    }\n    void raw_aggregate(const P2_Affine* q, const P1_Affine* p)\n    {   blst_pairing_raw_aggregate(*this, *q, *p);   }\n    PT as_fp12()\n    {   return PT(blst_pairing_as_fp12(*this));   }\n};\n\n} // namespace blst\n\n#endif\n"
  },
  {
    "path": "bindings/blst.swg",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\n%module blst\n%rename(\"%(strip:[blst_])s\") \"\";    // prefix is redundant in named module\n\n%include \"exception.i\"\n#ifdef __cplusplus\n%include \"std_string.i\"\n%typemap(out) SELF* OUTPUT = SWIGTYPE*; // to be overridden as required\n#else\n#warning consider using C++ interface\n#endif\n%include \"stdint.i\"\n\n%apply const char* { const byte*, const byte[ANY] }\n%apply (const char *STRING, size_t LENGTH) { (const byte *STRING,\n                                              size_t LENGTH) }\n\n#if defined(SWIGPYTHON)\n\n%header %{\n#if PY_VERSION_HEX<0x030d0000\n/* Tailored polyfill, for example no need to handle |n_bytes| == 0 here */\nstatic Py_ssize_t PyLong_AsNativeBytes(PyObject* v, void* buffer,\n                                       Py_ssize_t n_bytes, int flags)\n{\n    return _PyLong_AsByteArray((PyLongObject*)v,\n                               (unsigned char*)buffer, n_bytes,\n                               flags&1, (flags&4) == 0) < 0 ? -1 : n_bytes;\n}\n# define My_PYLONG_FLAGS (1 | 4 | 8)\n#else\n# define My_PYLONG_FLAGS (Py_ASNATIVEBYTES_LITTLE_ENDIAN | \\\n                          Py_ASNATIVEBYTES_UNSIGNED_BUFFER | \\\n                          Py_ASNATIVEBYTES_REJECT_NEGATIVE)\n#endif\n#if PY_VERSION_HEX<0x030e0000\nstatic int PyLong_GetSign(PyObject *obj, int *sign)\n{\n    if (!PyLong_Check(obj))\n        return -1;\n    *sign = _PyLong_Sign(obj);\n    return 0;\n}\n#endif\n%}\n\n// some sorcery to allow assignments as output, e.g.\n//      hash = blst.encode_to_g1(b\"foo\")\n%typemap(in, numinputs=0) OBJECT *OUTPUT($1_basetype temp) %{ $1 = &temp; %}\n%typemap(argout) OBJECT *OUTPUT {\n    PyObject *obj = SWIG_NewPointerObj(memcpy(malloc(sizeof($1_basetype)),\n                                              $1,sizeof($1_basetype)),\n                                       $descriptor, SWIG_POINTER_NEW);\n    $result = SWIG_AppendOutput($result, obj);\n}\n%apply OBJECT *OUTPUT {\n    blst_p1        *out, blst_p1        *out_pk, blst_p1        *out_sig,\n    blst_p1_affine *out, blst_p1_affine *out_pk, blst_p1_affine *out_sig,\n    blst_p2        *out, blst_p2        *out_pk, blst_p2        *out_sig,\n    blst_p2_affine *out, blst_p2_affine *out_pk, blst_p2_affine *out_sig,\n    blst_scalar    *out, blst_scalar    *out_SK,\n    blst_fp12      *out\n}\n\n// accept 'bytes' and 'bytearray' as inputs...\n%typemap(in) const byte* %{\n    if ($input == Py_None) {\n        $1 = NULL;\n    } else if (PyBytes_Check($input)) {\n        char *buf;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &buf, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname'\");\n\n        $1 = ($1_ltype)buf;\n    } else if (PyByteArray_Check($input)) {\n        $1 = ($1_ltype)PyByteArray_AsString($input);\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting 'bytes' or 'bytearray'\");\n    }\n%}\n%typemap(freearg) const byte* \"\"\n\n%typemap(in) const byte[ANY] %{\n    if (PyBytes_Check($input)) {\n        char *buf;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &buf, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname'\");\n\n        if (nbytes != $1_dim0)\n            SWIG_exception_fail(SWIG_ValueError, \"in method '$symname', \"\n                                                 \"expecting $1_dim0 bytes\");\n        $1 = ($1_ltype)buf;\n    } else if (PyByteArray_Check($input)) {\n        if (PyByteArray_Size($input) != $1_dim0)\n            SWIG_exception_fail(SWIG_ValueError, \"in method '$symname', \"\n                                                 \"expecting $1_dim0 bytes\");\n        $1 = ($1_ltype)PyByteArray_AsString($input);\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting 'bytes' or 'bytearray'\");\n    }\n%}\n%typemap(freearg) const byte[ANY] \"\"\n\n%typemap(in) (const byte *STRING, size_t LENGTH) %{\n    if ($input == Py_None) {\n        $1 = NULL;\n        $2 = 0;\n    } else if (PyBytes_Check($input)) {\n        char *buf;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &buf, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_ValueError, \"in method '$symname'\");\n\n        $1 = ($1_ltype)buf;\n        $2 = nbytes;\n    } else if (PyByteArray_Check($input)) {\n        $1 = ($1_ltype)PyByteArray_AsString($input);\n        $2 = PyByteArray_Size($input);\n#ifdef Py_USING_UNICODE\n    } else if (PyUnicode_Check($input)) {\n        char *buf;\n        Py_ssize_t nbytes;\n        PyObject *obj = PyUnicode_AsUTF8String($input);\n\n        if (obj == NULL || PyBytes_AsStringAndSize(obj, &buf, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_ValueError, \"in method '$symname'\");\n\n        $1 = ($1_ltype)alloca($2 = nbytes);\n        memcpy($1, buf, $2);\n        Py_DECREF(obj);\n#endif\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting 'bytes' or 'bytearray'\");\n    }\n%}\n%typemap(freearg) (const byte *STRING, size_t LENGTH) \"\"\n\n%typemap(in) blst::bytes_t %{\n    if ($input == Py_None) {\n        $1.ptr = NULL;\n        $1.len = 0;\n    } else if (PyBytes_Check($input)) {\n        char *buf;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &buf, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_ValueError, \"in method '$symname'\");\n\n        $1.ptr = (byte*)buf;\n        $1.len = nbytes;\n    } else if (PyByteArray_Check($input)) {\n        $1.ptr = (byte*)PyByteArray_AsString($input);\n        $1.len = PyByteArray_Size($input);\n#ifdef Py_USING_UNICODE\n    } else if (PyUnicode_Check($input)) {\n        char *buf;\n        Py_ssize_t nbytes;\n        PyObject *obj = PyUnicode_AsUTF8String($input);\n\n        if (obj == NULL || PyBytes_AsStringAndSize(obj, &buf, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_ValueError, \"in method '$symname'\");\n\n        auto ptr = alloca(nbytes);\n        memcpy(ptr, buf, nbytes);\n        $1.ptr = (byte*)ptr;\n        $1.len = nbytes;\n        Py_DECREF(obj);\n#endif\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting 'bytes' or 'bytearray'\");\n    }\n%}\n%typemap(freearg)   blst::bytes_t \"\"\n%typemap(typecheck) blst::bytes_t \"\"\n\n// let users use Python 'int', 'bytes' and 'bytearray' as scalars\n%typemap(in) (const byte* scalar, size_t nbits) %{\n    if (PyBytes_Check($input)) {\n        char *scalar;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &scalar, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname'\");\n\n        $1 = ($1_ltype)scalar;\n        $2 = 8 * nbytes;\n    } else if (PyByteArray_Check($input)) {\n        $1 = ($1_ltype)PyByteArray_AsString($input);\n        $2 = 8 * PyByteArray_Size($input);\n    } else if (PyLong_Check($input)) {\n        size_t nbytes;\n\n        $2 = _PyLong_NumBits($input);\n        $1 = ($1_ltype)alloca(nbytes = ($2 + 7)/8);\n\n        if (PyLong_AsNativeBytes($input, $1, nbytes, My_PYLONG_FLAGS) < 0)\n            SWIG_exception_fail(SWIG_OverflowError, \"in method '$symname'\");\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting 'int', 'bytes' \"\n                                            \"or 'bytearray'\");\n    }\n%}\n\n#ifdef __cplusplus\n%typemap(in) (const POINT* points[], size_t npoints)\n    (std::unique_ptr<$*1_ltype[]> points, size_t _global_npoints) %{\n    if (PyList_Check($input)) {\n        _global_npoints = PyList_Size($input);\n        points = std::unique_ptr<$*1_ltype[]>(new $*1_ltype[_global_npoints]);\n        PyObject* obj = PyList_GET_ITEM($input, 0);\n        // check the type of the 1st element\n        if (SWIG_ConvertPtr(obj, (void**)&points[0], $*1_descriptor, 0) != SWIG_OK)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                                \"expecting 'list' of '$*1_ltype'\");\n        for (size_t i = 1; i < _global_npoints; i++) {\n            obj = PyList_GET_ITEM($input, i);\n            points[i] = ($*1_ltype)SWIG_Python_GetSwigThis(obj)->ptr;\n        }\n        $1 = points.get();\n        $2 = _global_npoints;\n    } else if (PyBytes_Check($input)) {\n        char *bytes;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &bytes, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname'\");\n\n        points = std::unique_ptr<$*1_ltype[]>(new $*1_ltype[2]);\n        points[0] = ($*1_ltype)bytes;\n        points[1] = nullptr;\n        $1 = points.get();\n        $2 = _global_npoints = nbytes / sizeof(points[0][0]);\n    } else if (PyMemoryView_Check($input)) {    // output from to_affine()\n        Py_buffer *buf = PyMemoryView_GET_BUFFER($input);\n\n        if (!PyBytes_Check(buf->obj))\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                                \"expecting 'bytes' in \"\n                                                \"'memoryview'  for points[]\");\n        points = std::unique_ptr<$*1_ltype[]>(new $*1_ltype[2]);\n        points[0] = ($*1_ltype)buf->buf;\n        points[1] = nullptr;\n        $1 = points.get();\n        $2 = _global_npoints = buf->len / sizeof(points[0][0]);\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', expecting \"\n                                            \"'list', 'bytes' or 'memoryview' \"\n                                            \"for points[]\");\n    }\n%}\n%apply (const POINT* points[], size_t npoints) {\n       (const blst::P1_Affine* const points[], size_t npoints),\n       (const blst::P2_Affine* const points[], size_t npoints),\n       (const blst::P1* const points[], size_t npoints),\n       (const blst::P2* const points[], size_t npoints)\n}\n\n%typemap(in, numinputs=0) POINT points[] (PyObject *obj) \"\"\n%typemap(check) POINT points[] {\n    char *bytes;\n    Py_ssize_t size = sizeof($1[0]) * _global_npoints;\n\n    obj$argnum = PyBytes_FromStringAndSize(NULL, size);\n    if (obj$argnum == NULL) SWIG_fail;\n    PyBytes_AsStringAndSize(obj$argnum, &bytes, &size);\n    $1 = ($1_ltype)bytes;\n}\n%typemap(argout) POINT points[] %{\n    $result = PyMemoryView_FromObject(obj$argnum);\n    if ($result != NULL) {\n        // .itemsize to return size of point, and len() - amount of points\n        PyMemoryView_GET_BUFFER($result)->itemsize  = sizeof($1[0]);\n        PyMemoryView_GET_BUFFER($result)->shape[0] /= sizeof($1[0]);\n    } else {\n        Py_DECREF(obj$argnum);\n    }\n%}\n%apply POINT points[] { blst_p1_affine dst[], blst_p2_affine dst[] }\n\n%extend blst::P1_Affines {\n    static PyObject* as_memory(blst_p1_affine dst[],\n                               const blst::P1* const points[], size_t npoints)\n    {   blst_p1s_to_affine(dst, (const blst_p1 *const*)points, npoints);\n        return Py_None; // ignored by 'argout' typemap above\n    }\n}\n%extend blst::P2_Affines {\n    static PyObject* as_memory(blst_p2_affine dst[],\n                               const blst::P2* const points[], size_t npoints)\n    {   blst_p2s_to_affine(dst, (const blst_p2 *const*)points, npoints);\n        return Py_None; // ignored by 'argout' typemap above\n    }\n}\n%nodefault blst::P1_Affines;\n%nodefault blst::P2_Affines;\n\n%typemap(in) (const byte* const scalars[], size_t nbits)\n    (std::unique_ptr<byte[]> bytes, byte *scalars[2]) %{\n    if (PyList_Check($input)) {\n        if ((size_t)PyList_Size($input) != _global_npoints)\n            SWIG_exception_fail(SWIG_IndexError, \"in method '$symname', 'list' \"\n                                                 \"length mismatch for scalars[]\");\n\n        PyObject *obj = PyList_GET_ITEM($input, 0);\n        if (PyLong_Check(obj)) {\n            $2 = _PyLong_NumBits(obj);\n            for (size_t i = 1; i < _global_npoints; i++) {\n                size_t nbits;\n                int sign;\n                obj = PyList_GET_ITEM($input, i);\n                if (PyLong_GetSign(obj, &sign) < 0 || sign < 0)\n                    SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                                        \"expecting all 'long's\");\n                nbits = _PyLong_NumBits(obj);\n                if (nbits > $2) $2 = nbits;\n            }\n\n            size_t nbytes = ($2 + 7)/8;\n            bytes = std::unique_ptr<byte[]>(new byte[_global_npoints*nbytes]);\n            byte* scalar = bytes.get();\n            for (size_t i = 0; i < _global_npoints; i++, scalar += nbytes)\n                PyLong_AsNativeBytes(PyList_GET_ITEM($input, i),\n                                     scalar, nbytes, My_PYLONG_FLAGS);\n\n            scalars[0] = bytes.get();\n            scalars[1] = nullptr;\n            $1 = scalars;\n        } else {\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                                \"expecting 'list' of 'long's \"\n                                                \"for scalars[]\");\n        }\n    } else if (PyBytes_Check($input)) {\n        char *bytes;\n        Py_ssize_t nbytes;\n\n        if (PyBytes_AsStringAndSize($input, &bytes, &nbytes) < 0)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname'\");\n\n        scalars[0] = ($*1_ltype)bytes;\n        scalars[1] = nullptr;\n        $1 = scalars;\n        $2 = 8 * (nbytes / _global_npoints);\n    } else if (PyByteArray_Check($input)) {\n        scalars[0] = ($*1_ltype)PyByteArray_AsString($input);\n        scalars[1] = nullptr;\n        $1 = scalars;\n        $2 = 8 * (PyByteArray_Size($input) / _global_npoints);\n    } else if (PyMemoryView_Check($input)) {\n        Py_buffer *buf = PyMemoryView_GET_BUFFER($input);\n\n        if (!PyBytes_Check(buf->obj) && !PyByteArray_Check(buf->obj))\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                                \"expecting 'bytes' in \"\n                                                \"'memoryview'  for points[]\");\n        scalars[0] = ($*1_ltype)buf->buf;\n        scalars[1] = nullptr;\n        $1 = scalars;\n        $2 = 8 * (buf->len / _global_npoints);\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', expecting \"\n                                            \"'list', 'bytes', 'bytearray' \"\n                                            \"or 'memoryview' for scalars[]\");\n    }\n%}\n\n%typemap(out) BLST_ERROR %{\n    if ($1 != BLST_SUCCESS) {\n        SWIG_exception(SWIG_ValueError, BLST_ERROR_str[$1]);\n        SWIG_fail;\n    }\n    $result = SWIG_From_int($1);\n%}\n\n// return |this|\n%typemap(out) SELF* OUTPUT %{ (void)$1; Py_INCREF($result = swig_obj[0]); %}\n#endif\n\n#elif defined(SWIGJAVA)\n\n%header %{\n#ifdef __cplusplus\n# define JCALL(func, ...) jenv->func(__VA_ARGS__)\n#else\n# define JCALL(func, ...) (*jenv)->func(jenv, __VA_ARGS__)\n#endif\n%}\n\n%include \"enums.swg\"\n%include \"arrays_java.i\"\n%javaconst(1);\n\n#if SWIG_VERSION < 0x040000\n%apply (char *STRING, size_t LENGTH) { (const byte *STRING, size_t LENGTH) }\n#endif\n\n%pragma(java) jniclassimports=%{\nimport java.io.*;\nimport java.nio.file.*;\n%}\n%pragma(java) jniclasscode=%{\n    final static String libName = System.mapLibraryName(\"$module\");\n    final static String resName = System.getProperty(\"os.name\").replaceFirst(\" .*\",\"\")\n                                + \"/\" + System.getProperty(\"os.arch\")\n                                + \"/\" + libName;\n    static {\n        Class<?> imClazz = $imclassname.class;\n        InputStream res = imClazz.getResourceAsStream(\n                        System.getProperty(imClazz.getPackageName() + \".jniResource\", resName));\n        if (res == null) {\n            try {\n                System.loadLibrary(\"$module\");\n            } catch (UnsatisfiedLinkError e) {\n                String[] cmd = System.getProperty(\"sun.java.command\").split(\"/\");\n                if (!\"$imclassname\".equals(cmd[cmd.length-1]))\n                    // suppress exception if 'main' below is executed\n                    throw new RuntimeException(e.getMessage());\n            }\n        } else {\n            // unpack shared library into a temporary directory and load it\n            try {\n                Path tmpdir = Files.createTempDirectory(\"$module@\");\n                tmpdir.toFile().deleteOnExit();\n                Path tmpdll = Paths.get(tmpdir.toString(), libName);\n                tmpdll.toFile().deleteOnExit();\n                Files.copy(res, tmpdll, StandardCopyOption.REPLACE_EXISTING);\n                res.close();\n                System.load(tmpdll.toString());\n            } catch (IOException e) {\n                throw new RuntimeException(e.getMessage());\n            }\n        }\n    }\n    public static void main(String argv[]) {\n        System.out.println(resName);\n    }\n%}\n\n#ifdef __cplusplus\n// Extensive sorcery to shift memory management to JVM GC. General idea is\n// to use Java long[] as opaque storage for blst data. Methods that return\n// new objects allocate suitably sized long[] arrays from JVM heap,\n// references to which are then assigned to |swigCPtr| on the Java side.\n// And when passed back to JNI, |swigCPtr|s are dereferenced with\n// GetLongArrayElements... And no destructors!\n%nodefaultdtor;\n%typemap(javafinalize)  SWIGTYPE \"\"\n%typemap(javadestruct)  SWIGTYPE \"\"\n\n%typemap(javabody)      SWIGTYPE %{\n  private transient long[] swigCPtr;\n\n  protected $javaclassname(long[] cPtr) { swigCPtr = cPtr; }\n\n  protected static long[] getCPtr($javaclassname obj) {\n    return obj != null ? obj.swigCPtr : null;\n  }\n\n  public $javaclassname dup() { return new $javaclassname(swigCPtr.clone()); }\n%}\n%ignore dup;\n%typemap(javaconstruct) SWIGTYPE { this($imcall); }\n%typemap(jni)           SWIGTYPE, SWIGTYPE&, SWIGTYPE* \"jlongArray\"\n%typemap(jtype)         SWIGTYPE, SWIGTYPE&, SWIGTYPE* \"long[]\"\n%typemap(javaout)       SWIGTYPE, SWIGTYPE&, SWIGTYPE* {\n    return new $javaclassname($jnicall);\n}\n%typemap(in)            SWIGTYPE&, SWIGTYPE* %{\n    $1 = ($1_ltype)JCALL(GetLongArrayElements, $input, 0);\n%}\n%typemap(in)      const SWIGTYPE&, const SWIGTYPE* %{\n    $1 = $input ? ($1_ltype)JCALL(GetLongArrayElements, $input, 0) : NULL;\n%}\n%typemap(out)           SWIGTYPE&, SWIGTYPE* %{\n    if ($1 != $null) {\n        size_t sz = (sizeof($1_basetype) + sizeof(jlong) - 1)/sizeof(jlong);\n        $result = JCALL(NewLongArray, sz);\n        if ($result != $null)\n            JCALL(SetLongArrayRegion, $result, 0, sz, (const jlong *)$1);\n    }\n%}\n%typemap(out)           SWIGTYPE {\n    size_t sz = (sizeof($1_basetype) + sizeof(jlong) - 1)/sizeof(jlong);\n    $result = JCALL(NewLongArray, sz);\n    if ($result != $null)\n        JCALL(SetLongArrayRegion, $result, 0, sz, (const jlong *)&$1);\n}\n%typemap(newfree)       SWIGTYPE* \"delete $1;\"\n%typemap(freearg)       SWIGTYPE&, SWIGTYPE* %{\n    JCALL(ReleaseLongArrayElements, $input, (jlong *)$1, 0);\n%}\n%typemap(freearg) const SWIGTYPE&, const SWIGTYPE* %{\n    if ($input) JCALL(ReleaseLongArrayElements, $input, (jlong *)$1, JNI_ABORT);\n%}\n%typemap(freearg) const std::string& \"\"\n\n// I wish |jenv| was available in the constructor, so that NewLongArray\n// could be called at once, without having to resort to matching\n// %typemap(out)...\n%extend blst::Pairing {\n    Pairing(bool hash_or_encode, const std::string& DST)\n    {   size_t sz = blst_pairing_sizeof();\n        size_t SZ = (sz + DST.size() + sizeof(jlong) - 1)/sizeof(jlong);\n        blst_pairing *ret = (blst_pairing *)malloc(SZ*sizeof(jlong));\n        if (DST.size() != 0) {\n            byte *dst = (byte *)ret + sz;\n            memcpy(dst, DST.data(), DST.size());\n            blst_pairing_init(ret, hash_or_encode, dst, DST.size());\n        } else {\n            blst_pairing_init(ret, hash_or_encode, NULL, 0);\n        }\n        return (Pairing *)ret;\n    }\n}\n%typemap(out) blst::Pairing* {\n    size_t sz = blst_pairing_sizeof();\n    size_t SZ = (sz + arg2->size() + sizeof(jlong) - 1)/sizeof(jlong);\n    $result = JCALL(NewLongArray, SZ);\n    if ($result != $null)\n        JCALL(SetLongArrayRegion, $result, 0, SZ, (const jlong *)$1);\n}\n%typemap(newfree) blst::Pairing* \"free($1);\"\n\n%typemap(javaout) SELF* OUTPUT { $jnicall; return this; }\n%typemap(out)     SELF* OUTPUT \"(void)$1;\"\n%typemap(jni)     SELF* OUTPUT \"void\"\n%typemap(jtype)   SELF* OUTPUT \"void\"\n#endif\n\n%typemap(throws) BLST_ERROR %{\n    SWIG_JavaThrowException(jenv, SWIG_JavaRuntimeException,\n                                  BLST_ERROR_str[$1]);\n%}\n\n// handle input const byte[] more efficiently...\n%apply signed char[] { const byte* }\n%typemap(in) const byte* %{\n    $1 = $input ? ($1_ltype)JCALL(GetByteArrayElements, $input, 0) : NULL;\n%}\n%typemap(argout)  const byte* \"\"\n%typemap(freearg) const byte* %{\n    if ($input) JCALL(ReleaseByteArrayElements, $input, (jbyte *)$1, JNI_ABORT);\n%}\n\n%apply const byte* { const byte[ANY] }\n%typemap(in) const byte[ANY] {\n    size_t sz = JCALL(GetArrayLength, $input);\n    if (sz != $1_dim0) {\n        SWIG_JavaThrowException(jenv, SWIG_JavaIndexOutOfBoundsException,\n                                      \"BLST_ERROR: input size mismatch\");\n        return $null;\n    }\n    $1 = ($1_ltype)JCALL(GetByteArrayElements, $input, 0);\n}\n\n// let users use 'java.math.BigInteger' as scalars\n%typemap(in) (const byte* scalar, size_t nbits) %{\n    $2 = JCALL(GetArrayLength, $input);\n    $1 = ($1_ltype)alloca($2);\n    JCALL(GetByteArrayRegion, $input, 0, $2, (jbyte*)$1);\n    if (*(jbyte*)$1 < 0) {\n        SWIG_JavaThrowException(jenv, SWIG_JavaIllegalArgumentException,\n                                      \"expecting unsigned value\");\n        return $null;\n    }\n    {   // BigInteger.toByteArray() emits big-endian, flip the order...\n        size_t i, j;\n        for(i=0, j=$2-1; i<$2/2; i++, j--) {\n            $*1_ltype t=$1[i]; $1[i]=$1[j]; $1[j]=t;\n        }\n    }\n    if ($1[$2-1] == 0)\n        $2--;\n    $2 *= 8;\n%}\n%typemap(jni)    (const byte* scalar, size_t nbits) \"jbyteArray\"\n%typemap(jtype)  (const byte* scalar, size_t nbits) \"byte[]\"\n%typemap(jstype) (const byte* scalar, size_t nbits) \"java.math.BigInteger\"\n%typemap(javain) (const byte* scalar, size_t nbits) \"$javainput.toByteArray()\"\n\n%typemap(jni)    (const byte *STRING, size_t LENGTH) \"jbyteArray\"\n%typemap(jtype)  (const byte *STRING, size_t LENGTH) \"byte[]\"\n%typemap(jstype) (const byte *STRING, size_t LENGTH) \"byte[]\"\n%typemap(javain) (const byte *STRING, size_t LENGTH) \"$javainput\"\n%typemap(freearg)(const byte *STRING, size_t LENGTH) \"\"\n\n%typemap(jni)    blst::bytes_t \"jbyteArray\"\n%typemap(jtype)  blst::bytes_t \"byte[]\"\n%typemap(jstype) blst::bytes_t \"byte[]\"\n%typemap(javain) blst::bytes_t \"$javainput\"\n%typemap(freearg)blst::bytes_t \"\"\n%typemap(in)     blst::bytes_t %{\n    $1.ptr = (const byte*)JCALL(GetByteArrayElements, $input, 0);\n    $1.len = JCALL(GetArrayLength, $input);\n%}\n%typemap(argout) blst::bytes_t  %{\n    JCALL(ReleaseByteArrayElements, $input, (jbyte *)$1.ptr, JNI_ABORT);\n%}\n\n#elif defined(SWIGJAVASCRIPT) && defined(SWIG_JAVASCRIPT_V8)\n\n%header %{\n#if V8_MAJOR_VERSION >= 8\n# define GetData() GetBackingStore()->Data()\n#else\n# define GetData() GetContents().Data()\n#endif\n%}\n\n%typemap(throws) BLST_ERROR %{ SWIG_V8_Raise(BLST_ERROR_str[$1]); SWIG_fail; %}\n\n%typemap(in) const byte* %{\n    if ($input->IsArrayBufferView()) {\n        auto av = v8::Local<v8::ArrayBufferView>::Cast($input);\n        auto buf = av->Buffer();\n        $1 = ($1_ltype)buf->GetData() + av->ByteOffset();\n    } else if ($input->IsNull()) {\n        $1 = nullptr;\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting <Buffer>\");\n    }\n%}\n%typemap(argout)  const byte* \"\"\n%typemap(freearg) const byte* \"\"\n\n%apply const byte* { const byte[ANY] }\n%typemap(in) const byte[ANY] %{\n    if ($input->IsArrayBufferView()) {\n        auto av = v8::Local<v8::ArrayBufferView>::Cast($input);\n        if (av->ByteLength() != $1_dim0)\n            SWIG_exception_fail(SWIG_IndexError, \"in method '$symname', \"\n                                                 \"expecting $1_dim0 bytes\");\n        auto buf = av->Buffer();\n        $1 = ($1_ltype)buf->GetData() + av->ByteOffset();\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting <Buffer>\");\n    }\n%}\n\n// let users use JavaScript <BigInt> and <Buffer> as scalars\n%typemap(in) (const byte* scalar, size_t nbits) %{\n    if ($input->IsArrayBufferView()) {\n        auto av = v8::Local<v8::ArrayBufferView>::Cast($input);\n        auto buf = av->Buffer();\n        $1 = ($1_ltype)buf->GetData() + av->ByteOffset();\n        $2 = 8*av->ByteLength();\n#if V8_MAJOR_VERSION >=6 && V8_MINOR_VERSION >= 8\n    } else if ($input->IsBigInt()) {\n        auto bi = v8::Local<v8::BigInt>::Cast($input);\n        int sign, word_count = bi->WordCount();\n        uint64_t* words = (uint64_t*)alloca($2 = word_count*sizeof(uint64_t));\n\n        bi->ToWordsArray(&sign, &word_count, words);\n        if (sign)\n            SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                                \"expecting unsigned value\");\n        $1 = ($1_ltype)words;\n        $2 *= 8;\n\n        const union {\n            long one;\n            char little;\n        } is_endian = { 1 };\n\n        if (!is_endian.little) {\n            byte* p = $1;\n            for (int i = 0; i < word_count; i++) {\n                uint64_t val = words[i];\n                for (size_t j = 0; j < sizeof(val); j++, val >>= 8)\n                    *p++ = (byte)val;\n            }\n        }\n#endif\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting <Buffer> or <BigInt>\");\n    }\n%}\n\n%typemap(in) (const byte *STRING, size_t LENGTH) %{\n    if ($input->IsArrayBufferView()) {\n        auto av = v8::Local<v8::ArrayBufferView>::Cast($input);\n        auto buf = av->Buffer();\n        $1 = ($1_ltype)buf->GetData() + av->ByteOffset();\n        $2 = av->ByteLength();\n    } else if ($input->IsString()) {\n        auto str = v8::Local<v8::String>::Cast($input);\n        $2 = SWIGV8_UTF8_LENGTH(str);\n        $1 = ($1_ltype)alloca($2);\n        SWIGV8_WRITE_UTF8(str, (char *)$1, $2);\n    } else if ($input->IsNull()) {\n        $1 = nullptr;\n        $2 = 0;\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting <Buffer> or <String>\");\n    }\n%}\n%typemap(freearg) (const byte *STRING, size_t LENGTH) \"\"\n\n%typemap(in) blst::bytes_t %{\n    if ($input->IsArrayBufferView()) {\n        auto av = v8::Local<v8::ArrayBufferView>::Cast($input);\n        auto buf = av->Buffer();\n        $1.ptr = (byte*)buf->GetData() + av->ByteOffset();\n        $1.len = av->ByteLength();\n    } else if ($input->IsString()) {\n        auto str = v8::Local<v8::String>::Cast($input);\n        $1.len = SWIGV8_UTF8_LENGTH(str);\n        $1.ptr = (byte*)alloca($1.len);\n        SWIGV8_WRITE_UTF8(str, (char *)$1.ptr, $1.len);\n    } else if ($input->IsNull()) {\n        $1.ptr = nullptr;\n        $1.len = 0;\n    } else {\n        SWIG_exception_fail(SWIG_TypeError, \"in method '$symname', \"\n                                            \"expecting <Buffer> or <String>\");\n    }\n%}\n%typemap(freearg) blst::bytes_t \"\"\n\n// return |this|\n%typemap(out) SELF* OUTPUT %{ (void)$1; $result = args.Holder(); %}\n\n#elif defined(SWIGPERL)\n\n// let users use byte[] as scalars\n%apply (const char *STRING, size_t LENGTH) { (const byte* scalar, size_t nbits) }\n%typemap(check) (const byte* scalar, size_t nbits) %{ $2 *= 8; %}\n\n#ifdef __cplusplus\n// return |this|\n%typemap(out) SELF* OUTPUT %{ (void)$1; argvi++; %}\n#endif\n\n#endif  // SWIG<language>\n\n// everybody has a way to bundle pointer and buffer size, but C:-(\n%apply (const byte *STRING, size_t LENGTH) {\n       (const byte *msg,    size_t msg_len),\n       (const byte *DST,    size_t DST_len),\n       (const byte *aug,    size_t aug_len),\n       (const byte *IKM,    size_t IKM_len),\n       (const byte *info,   size_t info_len),\n       (const byte *salt,   size_t salt_len),\n       (const byte *in,     size_t len)\n}\n\n// some sorcery to return byte[] from serialization methods\n%typemap(in, numinputs=0) byte out[ANY] (byte temp[$1_dim0]) %{ $1 = temp; %}\n%typemap(argout) byte out[ANY] {\n#if defined(SWIGPYTHON)\n    PyObject *obj = SWIG_FromCharPtrAndSize((char *)$1, $1_dim0);\n    $result = SWIG_AppendOutput($result, obj);\n#elif defined(SWIGJAVA)\n    $result = JCALL(NewByteArray, $1_dim0);\n    if ($result != $null) {\n        JCALL(SetByteArrayRegion, $result, 0, $1_dim0, (const jbyte *)$1);\n    }\n#elif defined(SWIGJAVASCRIPT) && defined(SWIG_JAVASCRIPT_V8)\n    auto ab = v8::ArrayBuffer::New(v8::Isolate::GetCurrent(), $1_dim0);\n    memcpy(ab->GetData(), $1, $1_dim0);\n    $result = v8::Uint8Array::New(ab, 0, $1_dim0);\n#elif defined(SWIGPERL)\n    $result = SWIG_FromCharPtrAndSize((char *)$1, $1_dim0); argvi++;\n#else // TODO: figure out more language-specific ways to return multi-values...\n    if ($result == NULL)\n        $result = SWIG_FromCharPtrAndSize((char *)$1, $1_dim0);\n#endif\n}\n%typemap(freearg) byte out[ANY] \"\"\n#ifdef SWIGJAVA\n%typemap(jni)     byte out[ANY] \"jbyteArray\"\n%typemap(jtype)   byte out[ANY] \"byte[]\"\n%typemap(jstype)  byte out[ANY] \"byte[]\"\n%typemap(javaout) byte out[ANY] { return $jnicall; }\n#endif\n%apply byte out[ANY] {\n    void to_bendian,    void blst_bendian_from_scalar,\n    void to_lendian,    void blst_lendian_from_scalar,\n    void serialize,     void blst_p1_serialize, void blst_p1_affine_serialize,\n                        void blst_p2_serialize, void blst_p2_affine_serialize,\n    void compress,      void blst_p1_compress,  void blst_p1_affine_compress,\n                        void blst_p2_compress,  void blst_p2_affine_compress,\n    void blst_sk_to_pk2_in_g1,  void blst_sign_pk2_in_g1,\n    void blst_sk_to_pk2_in_g2,  void blst_sign_pk2_in_g2\n}\n\n#ifdef __cplusplus\n%apply const std::string& { const std::string* }\n\n#pragma SWIG nowarn=509,516\n\n#if !defined(SWIGPYTHON)\n%ignore P1_Affines;\n%ignore P2_Affines;\n#endif\n\n%ignore nullptr;\n%ignore None;\n%ignore C_bytes;\n%ignore bytes_t;\n%feature(\"novaluewrapper\") bytes_t;\n%catches(BLST_ERROR) P1(const byte* in, size_t len);\n%catches(BLST_ERROR) P1_Affine(const byte* in, size_t len);\n%catches(BLST_ERROR) aggregate(const P1_Affine& in);\n\n%catches(BLST_ERROR) P2(const byte* in, size_t len);\n%catches(BLST_ERROR) P2_Affine(const byte* in, size_t len);\n%catches(BLST_ERROR) aggregate(const P2_Affine& in);\n\n%catches(BLST_ERROR) blst::Scalar::add;\n%catches(BLST_ERROR) blst::Scalar::sub;\n%catches(BLST_ERROR) blst::Scalar::mul;\n\n// methods returning |this|\n%apply SELF* OUTPUT {\n    blst::P1* sign_with,    blst::P2* sign_with,\n    blst::P1* hash_to,      blst::P2* hash_to,\n    blst::P1* encode_to,    blst::P2* encode_to,\n    blst::P1* mult,         blst::P2* mult,\n    blst::P1* cneg,         blst::P2* cneg,\n    blst::P1* neg,          blst::P2* neg,\n    blst::P1* add,          blst::P2* add,\n    blst::P1* dbl,          blst::P2* dbl,\n    blst::PT* mul,          blst::PT* sqr,\n    blst::PT* final_exp,\n    blst::Scalar* from_bendian,\n    blst::Scalar* from_lendian,\n    blst::Scalar* add,\n    blst::Scalar* sub,\n    blst::Scalar* mul,\n    blst::Scalar* inverse\n}\n\ntypedef enum {\n    BLST_SUCCESS = 0,\n    BLST_BAD_ENCODING,\n    BLST_POINT_NOT_ON_CURVE,\n    BLST_POINT_NOT_IN_GROUP,\n    BLST_AGGR_TYPE_MISMATCH,\n    BLST_VERIFY_FAIL,\n    BLST_PK_IS_INFINITY,\n} BLST_ERROR;\n\n%include \"blst.hpp\"\n\nextern const blst::P1_Affine BLS12_381_G1;\nextern const blst::P1_Affine BLS12_381_NEG_G1;\nextern const blst::P2_Affine BLS12_381_G2;\nextern const blst::P2_Affine BLS12_381_NEG_G2;\n\n#else\n%ignore blst_fr;\n%ignore blst_fp;\n%ignore blst_fp2;\n%ignore blst_fp6;\n%ignore blst_scalar_from_uint32;\n%ignore blst_scalar_from_uint64;\n%ignore blst_uint32_from_scalar;\n%ignore blst_uint64_from_scalar;\n%ignore blst_pairing_init;\n%ignore blst_pairing_get_dst;\n\n%include \"blst.h\"\n%include \"blst_aux.h\"\n%extend blst_pairing {\n    blst_pairing(bool hash_or_encode, const byte *DST DEFNULL,\n                                      size_t DST_len DEFNULL)\n    {   void *ret = malloc(blst_pairing_sizeof());\n        if (DST_len != 0) {\n            void *dst = malloc(DST_len);\n            memcpy(dst, DST, DST_len);\n            blst_pairing_init(ret, hash_or_encode, dst, DST_len);\n        } else {\n            blst_pairing_init(ret, hash_or_encode, NULL, 0);\n        }\n        return ret;\n    }\n    ~blst_pairing()\n    {   void *dst = (void *)blst_pairing_get_dst($self);\n        if (dst != NULL) free(dst);\n        free($self);\n    }\n}\n#endif\n\n%begin %{\n#ifdef __cplusplus\n# include <memory>\n# include \"blst.hpp\"\nusing namespace blst;\n#else\n# include \"blst.h\"\n#endif\n\nstatic const char *const BLST_ERROR_str [] = {\n    \"BLST_ERROR: success\",\n    \"BLST_ERROR: bad point encoding\",\n    \"BLST_ERROR: point is not on curve\",\n    \"BLST_ERROR: point is not in group\",\n    \"BLST_ERROR: context type mismatch\",\n    \"BLST_ERROR: verify failed\",\n    \"BLST_ERROR: public key is infinite\",\n};\n\n#define SWIG_PYTHON_STRICT_BYTE_CHAR\n\n#if defined(__GNUC__)\n# ifndef alloca\n#  define alloca(s) __builtin_alloca(s)\n# endif\n#elif defined(__sun)\n# include <alloca.h>\n#elif defined(_WIN32)\n# include <malloc.h>\n# ifndef alloca\n#  define alloca(s) _alloca(s)\n# endif\n#endif\n%}\n\n#if defined(SWIGPYTHON) || defined(SWIGPERL)\n%include \"cdata.i\"\n#endif\n\n#if SWIG_VERSION < 0x040100 && defined(SWIGJAVASCRIPT)\n%wrapper %{\n#ifdef NODE_MODULE\n# undef NODE_MODULE\n# define NODE_MODULE NODE_MODULE_CONTEXT_AWARE\n// actually error-prone and not exactly suitable for production, but\n// sufficient for development purposes till SWIG 4.1.0 is released...\n#endif\n%}\n#endif\n\n#if SWIG_VERSION < 0x040100 && defined(SWIGJAVA)\n/* SWIG versions prior 4.1 were crossing the MinGW's ways on the path\n * to JNI 'jlong' type */\n%begin %{\n#if defined(__MINGW32__) && defined(__int64)\n# undef __int64\n#endif\n%}\n#endif\n"
  },
  {
    "path": "bindings/blst_aux.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLST_AUX_H__\n#define __BLST_AUX_H__\n/*\n * This file lists interfaces that might be promoted to blst.h or removed,\n * depending on their proven/unproven worthiness.\n */\n\nvoid blst_fr_ct_bfly(blst_fr *x0, blst_fr *x1, const blst_fr *twiddle);\nvoid blst_fr_gs_bfly(blst_fr *x0, blst_fr *x1, const blst_fr *twiddle);\nvoid blst_fr_to(blst_fr *ret, const blst_fr *a);\nvoid blst_fr_from(blst_fr *ret, const blst_fr *a);\n#ifdef BLST_FR_PENTAROOT\nvoid blst_fr_pentaroot(blst_fr *ret, const blst_fr *a);\nvoid blst_fr_pentapow(blst_fr *ret, const blst_fr *a);\n#endif\n\nvoid blst_fp_to(blst_fp *ret, const blst_fp *a);\nvoid blst_fp_from(blst_fp *ret, const blst_fp *a);\n\nbool blst_fp_is_square(const blst_fp *a);\nbool blst_fp2_is_square(const blst_fp2 *a);\n\nvoid blst_p1_from_jacobian(blst_p1 *out, const blst_p1 *in);\nvoid blst_p2_from_jacobian(blst_p2 *out, const blst_p2 *in);\n\n/*\n * Below functions produce both point and deserialized outcome of\n * SkToPk and Sign. However, deserialized outputs are pre-decorated\n * with sign and infinity bits. This means that you have to bring the\n * output into compliance prior returning to application. If you want\n * compressed point value, then do [equivalent of]\n *\n *  byte temp[96];\n *  blst_sk_to_pk2_in_g1(temp, out_pk, SK);\n *  temp[0] |= 0x80;\n *  memcpy(out, temp, 48);\n *\n * Otherwise do\n *\n *  blst_sk_to_pk2_in_g1(out, out_pk, SK);\n *  out[0] &= ~0x20;\n *\n * Either |out| or |out_<point>| can be NULL.\n */\nvoid blst_sk_to_pk2_in_g1(byte out[96], blst_p1_affine *out_pk,\n                          const blst_scalar *SK);\nvoid blst_sign_pk2_in_g1(byte out[192], blst_p2_affine *out_sig,\n                         const blst_p2 *hash, const blst_scalar *SK);\nvoid blst_sk_to_pk2_in_g2(byte out[192], blst_p2_affine *out_pk,\n                          const blst_scalar *SK);\nvoid blst_sign_pk2_in_g2(byte out[96], blst_p1_affine *out_sig,\n                         const blst_p1 *hash, const blst_scalar *SK);\n\n#ifdef __BLST_RUST_BINDGEN__\ntypedef struct {} blst_uniq;\n#else\ntypedef struct blst_opaque blst_uniq;\n#endif\n\nsize_t blst_uniq_sizeof(size_t n_nodes);\nvoid blst_uniq_init(blst_uniq *tree);\nbool blst_uniq_test(blst_uniq *tree, const byte *msg, size_t len);\n\n#ifdef expand_message_xmd\nvoid expand_message_xmd(unsigned char *bytes, size_t len_in_bytes,\n                        const unsigned char *aug, size_t aug_len,\n                        const unsigned char *msg, size_t msg_len,\n                        const unsigned char *DST, size_t DST_len);\n#else\nvoid blst_expand_message_xmd(byte *out, size_t out_len,\n                             const byte *msg, size_t msg_len,\n                             const byte *DST, size_t DST_len);\n#endif\n\nvoid blst_p1_unchecked_mult(blst_p1 *out, const blst_p1 *p, const byte *scalar,\n                                                            size_t nbits);\nvoid blst_p2_unchecked_mult(blst_p2 *out, const blst_p2 *p, const byte *scalar,\n                                                            size_t nbits);\n\nvoid blst_pairing_raw_aggregate(blst_pairing *ctx, const blst_p2_affine *q,\n                                                   const blst_p1_affine *p);\nblst_fp12 *blst_pairing_as_fp12(blst_pairing *ctx);\nvoid blst_bendian_from_fp12(byte out[48*12], const blst_fp12 *a);\n\nvoid blst_keygen_v3(blst_scalar *out_SK, const byte *IKM, size_t IKM_len,\n                    const byte *info DEFNULL, size_t info_len DEFNULL);\nvoid blst_keygen_v4_5(blst_scalar *out_SK, const byte *IKM, size_t IKM_len,\n                      const byte *salt, size_t salt_len,\n                      const byte *info DEFNULL, size_t info_len DEFNULL);\nvoid blst_keygen_v5(blst_scalar *out_SK, const byte *IKM, size_t IKM_len,\n                    const byte *salt, size_t salt_len,\n                    const byte *info DEFNULL, size_t info_len DEFNULL);\nvoid blst_derive_master_eip2333(blst_scalar *out_SK,\n                                const byte *IKM, size_t IKM_len);\nvoid blst_derive_child_eip2333(blst_scalar *out_SK, const blst_scalar *SK,\n                               uint32_t child_index);\n\nvoid blst_scalar_from_hexascii(blst_scalar *out, const byte *hex);\nvoid blst_fr_from_hexascii(blst_fr *ret, const byte *hex);\nvoid blst_fp_from_hexascii(blst_fp *ret, const byte *hex);\n\nsize_t blst_p1_sizeof(void);\nsize_t blst_p1_affine_sizeof(void);\nsize_t blst_p2_sizeof(void);\nsize_t blst_p2_affine_sizeof(void);\nsize_t blst_fp12_sizeof(void);\n\nvoid blst_fp_from_le_bytes(blst_fp *ret, const byte *in, size_t len);\nvoid blst_fp_from_be_bytes(blst_fp *ret, const byte *in, size_t len);\n\n/*\n * Single-shot SHA-256 hash function.\n */\nvoid blst_sha256(byte out[32], const byte *msg, size_t msg_len);\n#endif\n"
  },
  {
    "path": "bindings/c#/poc.cs",
    "content": "using System;\nusing System.Text;\nusing supranational;\n\nclass PoC {\n  private static void Main(string[] args)\n  {\n    var msg = Encoding.UTF8.GetBytes(\"assertion\");\n    var DST = \"MY-DST\";\n\n    var SK = new blst.SecretKey();\n    SK.keygen(Encoding.UTF8.GetBytes(new string('*', 32)));\n\n    // generate public key and serialize it...\n    var pk_for_wire = new blst.P1(SK).serialize();\n\n    // sign |msg| and serialize the signature...\n    var sig_for_wire = new blst.P2().hash_to(msg, DST, pk_for_wire)\n                                    .sign_with(SK)\n                                    .serialize();\n\n    // now on \"receiving\" side, start with deserialization...\n    var _sig = new blst.P2_Affine(sig_for_wire);\n    var _pk = new blst.P1_Affine(pk_for_wire);\n    if (!_pk.in_group())\n        throw new blst.Exception(blst.ERROR.POINT_NOT_IN_GROUP);\n    var ctx = new blst.Pairing(true, DST);\n    var err = ctx.aggregate(_pk, _sig, msg, pk_for_wire);\n    if (err != blst.ERROR.SUCCESS)\n        throw new blst.Exception(err);\n    ctx.commit();\n    if (!ctx.finalverify())\n        throw new blst.Exception(blst.ERROR.VERIFY_FAIL);\n    Console.WriteLine(\"OK\");\n\n    // exercise .as_fp12 by performing equivalent of ctx.finalverify above\n    var C1 = new blst.PT(_sig);\n    var C2 = ctx.as_fp12();\n    if (!blst.PT.finalverify(C1, C2))\n        throw new blst.Exception(blst.ERROR.VERIFY_FAIL);\n\n    // test integers as scalar multiplicands\n    var p = blst.G1();\n    var q = p.dup().dbl().dbl().add(p);\n    if (!p.mult(5).is_equal(q))\n       throw new ApplicationException(\"disaster\");\n    if (!blst.G1().mult(-5).is_equal(q.neg()))\n       throw new ApplicationException(\"disaster\");\n\n    // low-order sanity check\n    var p11 = new blst.P1(fromHexString(\"80803f0d09fec09a95f2ee7495323c15c162270c7cceaffa8566e941c66bcf206e72955d58b3b32e564de3209d672ca5\"));\n    if (p11.in_group())\n       throw new ApplicationException(\"disaster\");\n    if (!p11.mult(11).is_inf())\n       throw new ApplicationException(\"disaster\");\n  }\n\n  private static int fromHexChar(char c)\n  {\n    if      (c>='0' && c<='9')  return c - '0';\n    else if (c>='a' && c<='f')  return c - 'a' + 10;\n    else if (c>='A' && c<='F')  return c - 'A' + 10;\n    throw new ArgumentOutOfRangeException(\"non-hex character\");\n  }\n\n  private static byte[] fromHexString(string str)\n  {\n    if (str.Length%2 != 0)\n        throw new ArgumentException(\"odd number of characters in hex string\");\n\n    char[] hex = str.ToCharArray();\n    byte[] ret = new byte[hex.Length/2];\n\n    for (int i=0; i<hex.Length; i+=2)\n        ret[i/2] = (byte)(fromHexChar(hex[i]) << 4 | fromHexChar(hex[i+1]));\n\n    return ret;\n  }\n}\n"
  },
  {
    "path": "bindings/c#/poc.csproj",
    "content": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\n  <PropertyGroup>\n    <OutputType>Exe</OutputType>\n    <TargetFramework>net8.0</TargetFramework>\n    <NoWarn>CS8981</NoWarn>\n  </PropertyGroup>\n\n</Project>\n"
  },
  {
    "path": "bindings/c#/run.me",
    "content": "#!/usr/bin/env python3\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\nimport os\nimport re\nimport sys\nimport glob\nimport subprocess\n\ntop = \"\"\"\nusing System;\nusing System.Text;\nusing System.Numerics;\nusing System.Runtime.InteropServices;\nusing size_t = System.UIntPtr;\n\n#if NET5_0_OR_GREATER\nusing System.Runtime.Loader;\nusing System.Reflection;\nusing System.IO;\n#endif\n\nnamespace supranational { public static class blst {\n\n#if NET5_0_OR_GREATER\nprivate static readonly string dll;\n\nstatic blst()\n{\n    if (String.IsNullOrEmpty(dll)) {\n        var name = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? \"blst.dll\"\n                 : RuntimeInformation.IsOSPlatform(OSPlatform.OSX)     ? \"libblst.dll.dylib\"\n                 : \"libblst.dll.so\";\n\n        var dir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);\n        var arch = RuntimeInformation.ProcessArchitecture switch {\n            Architecture.X64   => \"x64\",\n            Architecture.Arm64 => \"arm64\",\n            _ => \"unsupported\"\n        };\n\n#if NET8_0_OR_GREATER\n        // RuntimeInformation.RuntimeIdentifier changed between .NET 7 and 8\n        // and only aligns to the nuget layout in 8+\n        var rid = RuntimeInformation.RuntimeIdentifier;\n#else\n        // Mimic pre-8 RuntimeInformation.RuntimeIdentifier as\n        // \"win-x64\", \"linux-x64\", \"linux-arm64\", \"osx-x64\", etc.\n        var os = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? \"win\"\n               : RuntimeInformation.IsOSPlatform(OSPlatform.OSX)     ? \"osx\"\n               : RuntimeInformation.IsOSPlatform(OSPlatform.FreeBSD) ? \"freebsd\"\n               : \"linux\";\n        var rid = $\"{os}-{arch}\";\n#endif\n\n        // first look for the file in the standard locations for a nuget installed native lib\n        dll = Path.Combine(dir, \"runtimes\", rid, \"native\", name);\n\n        if (!File.Exists(dll))\n            dll = Path.Combine(dir, arch, name); // try the original non-standard location\n\n        if (!File.Exists(dll))\n            dll = Path.Combine(Environment.CurrentDirectory, name);\n\n        if (File.Exists(dll)) {\n            AssemblyLoadContext.Default.ResolvingUnmanagedDll += (asm, needs) =>\n                (needs == \"blst.dll\" ? NativeLibrary.Load(dll) : IntPtr.Zero);\n        }\n    }\n}\n#endif\n\npublic enum ERROR {\n    SUCCESS = 0,\n    BAD_ENCODING,\n    POINT_NOT_ON_CURVE,\n    POINT_NOT_IN_GROUP,\n    AGGR_TYPE_MISMATCH,\n    VERIFY_FAIL,\n    PK_IS_INFINITY,\n    BAD_SCALAR,\n}\n\npublic class Exception : ApplicationException {\n    private readonly ERROR code;\n\n    public Exception(ERROR err) { code = err; }\n    public override string Message\n    {   get\n        {   switch(code) {\n            case ERROR.BAD_ENCODING:        return \"bad encoding\";\n            case ERROR.POINT_NOT_ON_CURVE:  return \"point not on curve\";\n            case ERROR.POINT_NOT_IN_GROUP:  return \"point not in group\";\n            case ERROR.AGGR_TYPE_MISMATCH:  return \"aggregate type mismatch\";\n            case ERROR.VERIFY_FAIL:         return \"verify failure\";\n            case ERROR.PK_IS_INFINITY:      return \"public key is infinity\";\n            case ERROR.BAD_SCALAR:          return \"bad scalar\";\n            default:                        return null;\n            }\n        }\n    }\n}\n\npublic enum ByteOrder {\n    BigEndian,\n    LittleEndian,\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                   [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen_v3([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                      [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen_v4_5([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                        [In] byte[] salt, size_t salt_len,\n                                        [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen_v5([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                      [In] byte[] salt, size_t salt_len,\n                                      [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_derive_master_eip2333([Out] byte[] key,\n                                              [In] byte[] IKM, size_t IKM_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_derive_child_eip2333([Out] byte[] key,\n                                             [In] byte[] master, uint child_index);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_bendian([Out] byte[] ret, [In] byte[] key);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_bendian_from_scalar([Out] byte[] ret, [In] byte[] key);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_check([In] byte[] key);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_lendian([Out] byte[] key, [In] byte[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_lendian_from_scalar([Out] byte[] key, [In] byte[] inp);\n\npublic struct SecretKey {\n    internal byte[] key;\n\n    //public SecretKey() { key = new byte[32]; }\n    public SecretKey(byte[] IKM, string info)\n    {   key = new byte[32]; keygen(IKM, info);   }\n    public SecretKey(byte[] inp, ByteOrder order=ByteOrder.BigEndian)\n    {   key = new byte[32];\n        switch(order) {\n        case ByteOrder.BigEndian:       from_bendian(inp);  break;\n        case ByteOrder.LittleEndian:    from_lendian(inp);  break;\n        }\n    }\n\n    public void keygen(byte[] IKM, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen(key, IKM, (size_t)IKM.Length,\n                         info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v3(byte[] IKM, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen_v3(key, IKM, (size_t)IKM.Length,\n                            info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v4_5(byte[] IKM, string salt, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] salt_bytes = Encoding.UTF8.GetBytes(salt);\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen_v4_5(key, IKM, (size_t)IKM.Length,\n                              salt_bytes, (size_t)salt_bytes.Length,\n                              info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v5(byte[] IKM, byte[] salt, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen_v5(key, IKM, (size_t)IKM.Length,\n                            salt, (size_t)salt.Length,\n                            info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v5(byte[] IKM, string salt, string info=\"\")\n    {   keygen_v5(IKM, Encoding.UTF8.GetBytes(salt), info);   }\n    public void derive_master_eip2333(byte[] IKM)\n    {   if (key == null) key = new byte[32];\n        blst_derive_master_eip2333(key, IKM, (size_t)IKM.Length);\n    }\n    public SecretKey(SecretKey master, uint child_index)\n    {   key = new byte[32];\n        blst_derive_child_eip2333(key, master.key, child_index);\n    }\n\n    public void from_bendian(byte[] inp)\n    {   if (inp.Length != 32)\n            throw new Exception(ERROR.BAD_ENCODING);\n        if (key == null) key = new byte[32];\n        blst_scalar_from_bendian(key, inp);\n        if (!blst_sk_check(key))\n            throw new Exception(ERROR.BAD_ENCODING);\n    }\n    public void from_lendian(byte[] inp)\n    {   if (inp.Length != 32)\n            throw new Exception(ERROR.BAD_ENCODING);\n        if (key == null) key = new byte[32];\n        blst_scalar_from_lendian(key, inp);\n        if (!blst_sk_check(key))\n            throw new Exception(ERROR.BAD_ENCODING);\n    }\n\n    public byte[] to_bendian()\n    {   byte[] ret = new byte[32];\n        blst_bendian_from_scalar(ret, key);\n        return ret;\n    }\n    public byte[] to_lendian()\n    {   byte[] ret = new byte[32];\n        blst_lendian_from_scalar(ret, key);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_be_bytes([Out] byte[] ret, [In] byte[] inp,\n                                                               size_t inp_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_le_bytes([Out] byte[] ret, [In] byte[] inp,\n                                                               size_t inp_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_add_n_check([Out] byte[] ret, [In] byte[] a,\n                                                         [In] byte[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_sub_n_check([Out] byte[] ret, [In] byte[] a,\n                                                         [In] byte[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_mul_n_check([Out] byte[] ret, [In] byte[] a,\n                                                         [In] byte[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_sk_inverse([Out] byte[] ret, [In] byte[] a);\n\npublic struct Scalar {\n    internal byte[] val;\n\n    //public Scalar() { val = new byte[32]; }\n    public Scalar(byte[] inp, ByteOrder order=ByteOrder.BigEndian)\n    {   val = new byte[32];\n        switch(order) {\n        case ByteOrder.BigEndian:       from_bendian(inp);  break;\n        case ByteOrder.LittleEndian:    from_lendian(inp);  break;\n        }\n    }\n    private Scalar(bool _)      { val = new byte[32];               }\n    private Scalar(Scalar orig) { val = (byte[])orig.val.Clone();   }\n\n    public Scalar dup()         { return new Scalar(this);          }\n\n    public void from_bendian(byte[] inp)\n    {   if (val == null) val = new byte[32];\n        blst_scalar_from_be_bytes(val, inp, (size_t)inp.Length);\n    }\n    public void from_lendian(byte[] inp)\n    {   if (val == null) val = new byte[32];\n        blst_scalar_from_le_bytes(val, inp, (size_t)inp.Length);\n    }\n\n    public byte[] to_bendian()\n    {   byte[] ret = new byte[32];\n        blst_bendian_from_scalar(ret, val);\n        return ret;\n    }\n    public byte[] to_lendian()\n    {   byte[] ret = new byte[32];\n        blst_lendian_from_scalar(ret, val);\n        return ret;\n    }\n\n    public Scalar add(SecretKey a)\n    {   if (!blst_sk_add_n_check(val, val, a.key))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar add(Scalar a)\n    {   if (!blst_sk_add_n_check(val, val, a.val))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar sub(Scalar a)\n    {   if (!blst_sk_sub_n_check(val, val, a.val))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar mul(Scalar a)\n    {   if (!blst_sk_mul_n_check(val, val, a.val))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar inverse()\n    {   blst_sk_inverse(val, val); return this;   }\n\n    public static Scalar operator+(Scalar a, Scalar b)\n    {   return a.dup().add(b);   }\n    public static Scalar operator-(Scalar a, Scalar b)\n    {   return a.dup().sub(b);   }\n    public static Scalar operator*(Scalar a, Scalar b)\n    {   return a.dup().mul(b);   }\n    public static Scalar operator/(Scalar a, Scalar b)\n    {   return b.dup().inverse().mul(a);   }\n}\n\nprivate const int P1_COMPRESSED_SZ = 384/8;\nprivate const int P2_COMPRESSED_SZ = 2*P1_COMPRESSED_SZ;\n\"\"\"\nmiddle = \"\"\"\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_p1_affine_sizeof();\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_p1_deserialize([Out] long[] ret, [In] byte[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_affine_serialize([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_affine_compress([Out] byte[] ret, [In] long[] inp);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_to_affine([Out] long[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_on_curve([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_in_g1([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_is_inf([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_is_equal([In] long[] a, [In] long[] b);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_p1_generator();\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_core_verify_pk_in_g2([In] long[] pk, [In] long[] sig,\n                                              bool hash_or_encode,\n                                              [In] byte[] msg, size_t msg_len,\n                                              [In] byte[] dst, size_t dst_len,\n                                              [In] byte[] aug, size_t aug_len);\n\npublic struct P1_Affine {\n    internal readonly long[] point;\n\n    private static readonly int sz = (int)blst_p1_affine_sizeof()/sizeof(long);\n\n    //public P1_Affine()            { point = new long[sz]; }\n    private P1_Affine(bool _)       { point = new long[sz]; }\n    private P1_Affine(P1_Affine p)  { point = (long[])p.point.Clone(); }\n\n    public P1_Affine(byte[] inp) : this(true)\n    {   int len = inp.Length;\n        if (len == 0 || len != ((inp[0]&0x80) == 0x80 ? P1_COMPRESSED_SZ\n                                                      : 2*P1_COMPRESSED_SZ))\n            throw new Exception(ERROR.BAD_ENCODING);\n        ERROR err = blst_p1_deserialize(point, inp);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n    }\n    public P1_Affine(P1 jacobian) : this(true)\n    {   blst_p1_to_affine(point, jacobian.point);   }\n\n    public P1_Affine dup()      { return new P1_Affine(this);   }\n    public P1 to_jacobian()     { return new P1(this);          }\n    public byte[] serialize()\n    {   byte[] ret = new byte[2*P1_COMPRESSED_SZ];\n        blst_p1_affine_serialize(ret, point);\n        return ret;\n    }\n    public byte[] compress()\n    {   byte[] ret = new byte[P1_COMPRESSED_SZ];\n        blst_p1_affine_compress(ret, point);\n        return ret;\n    }\n\n    public bool on_curve()      { return blst_p1_affine_on_curve(point);    }\n    public bool in_group()      { return blst_p1_affine_in_g1(point);       }\n    public bool is_inf()        { return blst_p1_affine_is_inf(point);      }\n    public bool is_equal(P1_Affine p)\n    {   return blst_p1_affine_is_equal(point, p.point);   }\n\n    ERROR core_verify(P2_Affine pk, bool hash_or_encode,\n                      byte[] msg, string DST = \"\", byte[] aug = null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        return blst_core_verify_pk_in_g2(pk.point, point,\n                                         hash_or_encode,\n                                         msg, (size_t)msg.Length,\n                                         dst, (size_t)dst.Length,\n                                         aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n\n    public static P1_Affine generator()\n    {   var ret = new P1_Affine(true);\n        Marshal.Copy(blst_p1_generator(), ret.point, 0, ret.point.Length);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_p1_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_serialize([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_compress([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_from_affine([Out] long[] ret, [In] long[] inp);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_on_curve([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_in_g1([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_is_inf([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_is_equal([In] long[] a, [In] long[] b);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_sk_to_pk_in_g1([Out] long[] ret, [In] byte[] SK);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_encode_to_g1([Out] long[] ret, [In] byte[] msg, size_t msg_len,\n                                         [In] byte[] dst, size_t dst_len,\n                                         [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_hash_to_g1([Out] long[] ret, [In] byte[] msg, size_t msg_len,\n                                       [In] byte[] dst, size_t dst_len,\n                                       [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_sign_pk_in_g2([Out] long[] ret, [In] long[] hash, [In] byte[] SK);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p1_mult([Out] long[] ret, [In] long[] a,\n                                    [In] byte[] scalar, size_t nbits);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_cneg([Out] long[] ret, bool cbit);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p1_add_or_double([Out] long[] ret, [In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p1_add_or_double_affine([Out] long[] ret, [In] long[] a,\n                                                    [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_double([Out] long[] ret, [In] long[] a);\n\npublic struct P1 {\n    internal long[] point;\n\n    private static readonly int sz = (int)blst_p1_sizeof()/sizeof(long);\n\n    //public P1()           { point = new long[sz]; }\n    private P1(bool _)      { point = new long[sz]; }\n    private P1(P1 p)        { point = (long[])p.point.Clone(); }\n    private long[] self()\n    {   if (point==null) { point = new long[sz]; } return point;   }\n\n    public P1(SecretKey sk) : this(true)\n    {   blst_sk_to_pk_in_g1(point, sk.key);   }\n    public P1(byte[] inp) : this(true)\n    {   int len = inp.Length;\n        if (len == 0 || len != ((inp[0]&0x80) == 0x80 ? P1_COMPRESSED_SZ\n                                                      : 2*P1_COMPRESSED_SZ))\n            throw new Exception(ERROR.BAD_ENCODING);\n        ERROR err = blst_p1_deserialize(point, inp);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n        blst_p1_from_affine(point, point);\n    }\n    public P1(P1_Affine affine) : this(true)\n    {   blst_p1_from_affine(point, affine.point);   }\n\n    public P1 dup()                 { return new P1(this);                  }\n    public P1_Affine to_affine()    { return new P1_Affine(this);           }\n    public byte[] serialize()\n    {   byte[] ret = new byte[2*P1_COMPRESSED_SZ];\n        blst_p1_serialize(ret, point);\n        return ret;\n    }\n    public byte[] compress()\n    {   byte[] ret = new byte[P1_COMPRESSED_SZ];\n        blst_p1_compress(ret, point);\n        return ret;\n    }\n\n    public bool on_curve()      { return blst_p1_on_curve(point);           }\n    public bool in_group()      { return blst_p1_in_g1(point);              }\n    public bool is_inf()        { return blst_p1_is_inf(point);             }\n    public bool is_equal(P1 p)  { return blst_p1_is_equal(point, p.point);  }\n\n    public P1 hash_to(byte[] msg, string DST=\"\", byte[] aug=null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        blst_hash_to_g1(self(), msg, (size_t)msg.Length,\n                                dst, (size_t)dst.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n        return this;\n    }\n    public P1 encode_to(byte[] msg, string DST=\"\", byte[] aug=null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        blst_encode_to_g1(self(), msg, (size_t)msg.Length,\n                                  dst, (size_t)dst.Length,\n                                  aug, (size_t)(aug!=null ? aug.Length : 0));\n        return this;\n    }\n\n    public P1 sign_with(SecretKey sk)\n    {   blst_sign_pk_in_g2(point, point, sk.key); return this;   }\n    public P1 sign_with(Scalar scalar)\n    {   blst_sign_pk_in_g2(point, point, scalar.val); return this;   }\n\n    public void aggregate(P1_Affine inp)\n    {   if (blst_p1_affine_in_g1(inp.point))\n            blst_p1_add_or_double_affine(point, point, inp.point);\n        else\n            throw new Exception(ERROR.POINT_NOT_IN_GROUP);\n    }\n\n    public P1 mult(byte[] scalar)\n    {   blst_p1_mult(point, point, scalar, (size_t)(scalar.Length*8));\n        return this;\n    }\n    public P1 mult(Scalar scalar)\n    {   blst_p1_mult(point, point, scalar.val, (size_t)255);\n        return this;\n    }\n    public P1 mult(BigInteger scalar)\n    {   byte[] val;\n        if (scalar.Sign < 0) {\n            val = BigInteger.Negate(scalar).ToByteArray();\n            blst_p1_cneg(point, true);\n        } else {\n            val = scalar.ToByteArray();\n        }\n        int len = val.Length;\n        if (val[len-1]==0) len--;\n        blst_p1_mult(point, point, val, (size_t)(len*8));\n        return this;\n    }\n    public P1 cneg(bool flag)   { blst_p1_cneg(point, flag); return this;   }\n    public P1 neg()             { blst_p1_cneg(point, true); return this;   }\n    public P1 add(P1 a)\n    {   blst_p1_add_or_double(point, point, a.point); return this;          }\n    public P1 add(P1_Affine a)\n    {   blst_p1_add_or_double_affine(point, point, a.point); return this;   }\n    public P1 dbl()\n    {   blst_p1_double(point, point); return this;                          }\n\n    public static P1 generator()\n    {   var ret = new P1(true);\n        Marshal.Copy(blst_p1_generator(), ret.point, 0, ret.point.Length);\n        return ret;\n    }\n}\n\npublic static P1 G1() { return P1.generator(); }\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_aggregated_in_g1([Out] long[] fp12, [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_aggregate_pk_in_g1([In, Out] long[] fp12,\n                                [In] long[] pk, [In] long[] sig,\n                                [In] byte[] msg, size_t msg_len,\n                                [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_mul_n_aggregate_pk_in_g1([In, Out] long[] fp12,\n                                [In] long[] pk, [In] long[] sig,\n                                [In] byte[] scalar, size_t nbits,\n                                [In] byte[] msg, size_t msg_len,\n                                [In] byte[] aug, size_t aug_len);\n\"\"\"\nbottom = \"\"\"\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_fp12_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_miller_loop([Out] long[] fp12, [In] long[] q,\n                                                       [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_is_one([In] long[] fp12);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_is_equal([In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_fp12_sqr([Out] long[] ret, [In] long[] a);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_fp12_mul([Out] long[] ret, [In] long[] a,\n                                                   [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_final_exp([Out] long[] ret, [In] long[] a);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_finalverify([In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_fp12_one();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_in_group([In] long[] a);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_bendian_from_fp12([Out] byte[] ret, [In] long[] a);\n\npublic struct PT {\n    internal readonly long[] fp12;\n\n    private static readonly int sz = (int)blst_fp12_sizeof()/sizeof(long);\n\n    internal PT(bool _)     { fp12 = new long[sz]; }\n    private PT(PT orig)     { fp12 = (long[])orig.fp12.Clone(); }\n\n    public PT(P1_Affine p) : this(true)\n    {   blst_aggregated_in_g1(fp12, p.point);   }\n    public PT(P1 p) : this(true)\n    {   blst_aggregated_in_g1(fp12, (new P1_Affine(p)).point);   }\n    public PT(P2_Affine q) : this(true)\n    {   blst_aggregated_in_g2(fp12, q.point);   }\n    public PT(P2 q) : this(true)\n    {   blst_aggregated_in_g2(fp12, (new P2_Affine(q)).point);   }\n    public PT(P2_Affine q, P1_Affine p) : this(true)\n    {   blst_miller_loop(fp12, q.point, p.point);   }\n    public PT(P1_Affine p, P2_Affine q) : this(q, p) {}\n    public PT(P2 q, P1 p) : this(true)\n    {   blst_miller_loop(fp12, (new P2_Affine(q)).point,\n                               (new P1_Affine(p)).point);\n    }\n    public PT(P1 p, P2 q) : this(q, p) {}\n\n    public PT dup()         { return new PT(this); }\n    public bool is_one()    { return blst_fp12_is_one(fp12); }\n    public bool is_equal(PT p)\n    {   return blst_fp12_is_equal(fp12, p.fp12);   }\n    public PT sqr()         { blst_fp12_sqr(fp12, fp12);         return this; }\n    public PT mul(PT p)     { blst_fp12_mul(fp12, fp12, p.fp12); return this; }\n    public PT final_exp()   { blst_final_exp(fp12, fp12);        return this; }\n    public bool in_group()  { return blst_fp12_in_group(fp12); }\n    public byte[] to_bendian()\n    {   byte[] ret = new byte[12*P1_COMPRESSED_SZ];\n        blst_bendian_from_fp12(ret, fp12);\n        return ret;\n    }\n\n    public static bool finalverify(PT gt1, PT gt2)\n    {   return blst_fp12_finalverify(gt1.fp12, gt2.fp12);   }\n\n    public static PT one()\n    {   var ret = new PT(true);\n        Marshal.Copy(blst_fp12_one(), ret.fp12, 0, ret.fp12.Length);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_pairing_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_pairing_init([In, Out] long[] ctx, bool hash_or_encode,\n                                             [In] ref long dst, size_t dst_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_pairing_commit([In, Out] long[] ctx);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_merge([In, Out] long[] ctx, [In] long[] ctx1);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_pairing_finalverify([In] long[] ctx, [In] long[] sig);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_pairing_raw_aggregate([In, Out] long[] ctx, [In] long[] q,\n                                                      [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_pairing_as_fp12([In] long[] ctx);\n\npublic struct Pairing {\n    private readonly long[] ctx;\n\n    private static readonly int sz = (int)blst_pairing_sizeof()/sizeof(long);\n\n    public Pairing(bool hash_or_encode=false, string DST=\"\")\n    {\n        byte[] dst = Encoding.UTF8.GetBytes(DST);\n        int dst_len = dst.Length;\n        int add_len = dst_len!=0 ? (dst_len+sizeof(long)-1)/sizeof(long) : 1;\n        Array.Resize(ref dst, add_len*sizeof(long));\n\n        ctx = new long[sz+add_len];\n\n        for (int i=0; i<add_len; i++)\n            ctx[sz+i] = BitConverter.ToInt64(dst, i*sizeof(long));\n\n        GCHandle h = GCHandle.Alloc(ctx, GCHandleType.Pinned);\n        blst_pairing_init(ctx, hash_or_encode, ref ctx[sz], (size_t)dst_len);\n        h.Free();\n    }\n\n    public ERROR aggregate(P1_Affine pk, Nullable<P2_Affine> sig,\n                                         byte[] msg, byte[] aug=null)\n    {   return blst_pairing_aggregate_pk_in_g1(ctx, pk.point,\n                                sig.HasValue ? sig.Value.point : null,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n    public ERROR aggregate(P2_Affine pk, Nullable<P1_Affine> sig,\n                                         byte[] msg, byte[] aug=null)\n    {   return blst_pairing_aggregate_pk_in_g2(ctx, pk.point,\n                                sig.HasValue ? sig.Value.point : null,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n    public ERROR mul_n_aggregate(P2_Affine pk, P1_Affine sig,\n                                               byte[] scalar, int nbits,\n                                               byte[] msg, byte[] aug=null)\n    {   return blst_pairing_mul_n_aggregate_pk_in_g2(ctx, pk.point, sig.point,\n                                scalar, (size_t)nbits,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n    public ERROR mul_n_aggregate(P1_Affine pk, P2_Affine sig,\n                                               byte[] scalar, int nbits,\n                                               byte[] msg, byte[] aug=null)\n    {   return blst_pairing_mul_n_aggregate_pk_in_g1(ctx, pk.point, sig.point,\n                                scalar, (size_t)nbits,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n\n    public void commit()    { blst_pairing_commit(ctx); }\n    public void merge(Pairing a)\n    {   var err = blst_pairing_merge(ctx, a.ctx);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n    }\n    public bool finalverify(PT sig=new PT())\n    {   return blst_pairing_finalverify(ctx, sig.fp12);   }\n\n    public void raw_aggregate(P2_Affine q, P1_Affine p)\n    {   blst_pairing_raw_aggregate(ctx, q.point, p.point);   }\n    public void raw_aggregate(P1_Affine p, P2_Affine q)\n    {   raw_aggregate(q, p);   }\n    public void raw_aggregate(P2 q, P1 p)\n    {   blst_pairing_raw_aggregate(ctx, (new P2_Affine(q)).point,\n                                        (new P1_Affine(p)).point);\n    }\n    public void raw_aggregate(P1 p, P2 q)\n    {   raw_aggregate(q, p);   }\n    public PT as_fp12()\n    {   var ret = new PT(true);\n        GCHandle h = GCHandle.Alloc(ctx, GCHandleType.Pinned);\n        Marshal.Copy(blst_pairing_as_fp12(ctx), ret.fp12, 0, ret.fp12.Length);\n        h.Free();\n        return ret;\n    }\n}\n}}\"\"\"\n\nhere = re.split(r'[/\\\\](?=[^/\\\\]*$)', sys.argv[0])\nif len(here) > 1:\n    os.chdir(here[0])\n\n\ndef xchg_1vs2(matchobj):\n    if matchobj.group(2) == '1':\n        return matchobj.group(1) + '2'\n    else:\n        return matchobj.group(1) + '1'\n\n\ndef newer(files):\n    if len(files) == 1:\n        return True\n    rh = files[-1]\n    if not os.path.exists(rh):\n        return True\n    for lh in files[:-1]:\n        if os.stat(lh).st_ctime > os.stat(rh).st_ctime:\n            return True\n    return False\n\n\nfname = \"supranational.blst.cs\"\nif newer([here[-1], fname]):\n    fd = open(fname, \"w\")\n    print(\"//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fd)\n    print(\"// DO NOT EDIT THIS FILE!!!\",                         file=fd)\n    print(\"// The file is auto-generated by \" + here[-1],        file=fd)\n    print(\"//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fd)\n    print(\"\\n\\n\", file=fd)\n    print(top,    file=fd)\n    print(middle, file=fd)\n    print(re.sub(r'((?<!f)[pgPG])([12])', xchg_1vs2, middle), file=fd)\n    print(bottom, file=fd)\n    fd.close()\n\ntry:  # mono-devel on Linux [\"-language:5\" corresponds to latest ECMA/ISO]\n    subprocess.check_call([\"mcs\", \"-langversion:5\", \"-optimize+\",\n                                  \"poc.cs\", fname, \"-r:System.Numerics.dll\"])\n    if newer([\"../blst.h\"] + glob.glob(\"libblst.dll.*\")):\n        print(\"building libblst.dll...\") or sys.stdout.flush()\n        subprocess.check_call([\"../../build.sh\", \"-dll\"] + sys.argv[1:])\n    subprocess.check_call([\"mono\", \"poc.exe\"])\n    sys.exit(0)\nexcept OSError as e:\n    if e.errno != 2:    # not \"no such file or directory\"\n        raise e\n\ntry:  # Visual Studio Developer Command Prompt\n    subprocess.check_call([\"csc\", \"-langversion:5\", \"-optimize+\",\n                                  \"poc.cs\", fname, \"-r:System.Numerics.dll\"])\n    if newer([os.path.normpath(\"../blst.h\"), \"blst.dll\"]):\n        print(\"building blst.dll...\") or sys.stdout.flush()\n        subprocess.check_call([os.path.normpath(\"../../build.bat\"), \"-shared\"]\n                              + sys.argv[1:])\n    subprocess.check_call(os.path.normpath(\"./poc.exe\"))\n    sys.exit(0)\nexcept OSError as e:\n    if e.errno != 2:    # not \"no such file or directory\"\n        raise e\n\n# env = os.environ.copy()\n# env[\"PATH\"] = os.getcwd() + os.path.pathsep + env[\"PATH\"]\n# env[\"DYLD_FALLBACK_LIBRARY_PATH\"] = os.getcwd()\n# env[\"LD_LIBRARY_PATH\"] = os.getcwd()\n# subprocess.check_call([\"dotnet\", \"run\"], env=env)\n"
  },
  {
    "path": "bindings/c#/supranational.blst.cs",
    "content": "//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n// DO NOT EDIT THIS FILE!!!\n// The file is auto-generated by run.me\n//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n\n\n\nusing System;\nusing System.Text;\nusing System.Numerics;\nusing System.Runtime.InteropServices;\nusing size_t = System.UIntPtr;\n\n#if NET5_0_OR_GREATER\nusing System.Runtime.Loader;\nusing System.Reflection;\nusing System.IO;\n#endif\n\nnamespace supranational { public static class blst {\n\n#if NET5_0_OR_GREATER\nprivate static readonly string dll;\n\nstatic blst()\n{\n    if (String.IsNullOrEmpty(dll)) {\n        var name = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? \"blst.dll\"\n                 : RuntimeInformation.IsOSPlatform(OSPlatform.OSX)     ? \"libblst.dll.dylib\"\n                 : \"libblst.dll.so\";\n\n        var dir = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location);\n        var arch = RuntimeInformation.ProcessArchitecture switch {\n            Architecture.X64   => \"x64\",\n            Architecture.Arm64 => \"arm64\",\n            _ => \"unsupported\"\n        };\n\n#if NET8_0_OR_GREATER\n        // RuntimeInformation.RuntimeIdentifier changed between .NET 7 and 8\n        // and only aligns to the nuget layout in 8+\n        var rid = RuntimeInformation.RuntimeIdentifier;\n#else\n        // Mimic pre-8 RuntimeInformation.RuntimeIdentifier as\n        // \"win-x64\", \"linux-x64\", \"linux-arm64\", \"osx-x64\", etc.\n        var os = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? \"win\"\n               : RuntimeInformation.IsOSPlatform(OSPlatform.OSX)     ? \"osx\"\n               : RuntimeInformation.IsOSPlatform(OSPlatform.FreeBSD) ? \"freebsd\"\n               : \"linux\";\n        var rid = $\"{os}-{arch}\";\n#endif\n\n        // first look for the file in the standard locations for a nuget installed native lib\n        dll = Path.Combine(dir, \"runtimes\", rid, \"native\", name);\n\n        if (!File.Exists(dll))\n            dll = Path.Combine(dir, arch, name); // try the original non-standard location\n\n        if (!File.Exists(dll))\n            dll = Path.Combine(Environment.CurrentDirectory, name);\n\n        if (File.Exists(dll)) {\n            AssemblyLoadContext.Default.ResolvingUnmanagedDll += (asm, needs) =>\n                (needs == \"blst.dll\" ? NativeLibrary.Load(dll) : IntPtr.Zero);\n        }\n    }\n}\n#endif\n\npublic enum ERROR {\n    SUCCESS = 0,\n    BAD_ENCODING,\n    POINT_NOT_ON_CURVE,\n    POINT_NOT_IN_GROUP,\n    AGGR_TYPE_MISMATCH,\n    VERIFY_FAIL,\n    PK_IS_INFINITY,\n    BAD_SCALAR,\n}\n\npublic class Exception : ApplicationException {\n    private readonly ERROR code;\n\n    public Exception(ERROR err) { code = err; }\n    public override string Message\n    {   get\n        {   switch(code) {\n            case ERROR.BAD_ENCODING:        return \"bad encoding\";\n            case ERROR.POINT_NOT_ON_CURVE:  return \"point not on curve\";\n            case ERROR.POINT_NOT_IN_GROUP:  return \"point not in group\";\n            case ERROR.AGGR_TYPE_MISMATCH:  return \"aggregate type mismatch\";\n            case ERROR.VERIFY_FAIL:         return \"verify failure\";\n            case ERROR.PK_IS_INFINITY:      return \"public key is infinity\";\n            case ERROR.BAD_SCALAR:          return \"bad scalar\";\n            default:                        return null;\n            }\n        }\n    }\n}\n\npublic enum ByteOrder {\n    BigEndian,\n    LittleEndian,\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                   [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen_v3([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                      [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen_v4_5([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                        [In] byte[] salt, size_t salt_len,\n                                        [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_keygen_v5([Out] byte[] key, [In] byte[] IKM, size_t IKM_len,\n                                      [In] byte[] salt, size_t salt_len,\n                                      [In] byte[] info, size_t info_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_derive_master_eip2333([Out] byte[] key,\n                                              [In] byte[] IKM, size_t IKM_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_derive_child_eip2333([Out] byte[] key,\n                                             [In] byte[] master, uint child_index);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_bendian([Out] byte[] ret, [In] byte[] key);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_bendian_from_scalar([Out] byte[] ret, [In] byte[] key);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_check([In] byte[] key);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_lendian([Out] byte[] key, [In] byte[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_lendian_from_scalar([Out] byte[] key, [In] byte[] inp);\n\npublic struct SecretKey {\n    internal byte[] key;\n\n    //public SecretKey() { key = new byte[32]; }\n    public SecretKey(byte[] IKM, string info)\n    {   key = new byte[32]; keygen(IKM, info);   }\n    public SecretKey(byte[] inp, ByteOrder order=ByteOrder.BigEndian)\n    {   key = new byte[32];\n        switch(order) {\n        case ByteOrder.BigEndian:       from_bendian(inp);  break;\n        case ByteOrder.LittleEndian:    from_lendian(inp);  break;\n        }\n    }\n\n    public void keygen(byte[] IKM, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen(key, IKM, (size_t)IKM.Length,\n                         info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v3(byte[] IKM, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen_v3(key, IKM, (size_t)IKM.Length,\n                            info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v4_5(byte[] IKM, string salt, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] salt_bytes = Encoding.UTF8.GetBytes(salt);\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen_v4_5(key, IKM, (size_t)IKM.Length,\n                              salt_bytes, (size_t)salt_bytes.Length,\n                              info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v5(byte[] IKM, byte[] salt, string info=\"\")\n    {   if (key == null) key = new byte[32];\n        byte[] info_bytes = Encoding.UTF8.GetBytes(info);\n        blst_keygen_v5(key, IKM, (size_t)IKM.Length,\n                            salt, (size_t)salt.Length,\n                            info_bytes, (size_t)info_bytes.Length);\n    }\n    public void keygen_v5(byte[] IKM, string salt, string info=\"\")\n    {   keygen_v5(IKM, Encoding.UTF8.GetBytes(salt), info);   }\n    public void derive_master_eip2333(byte[] IKM)\n    {   if (key == null) key = new byte[32];\n        blst_derive_master_eip2333(key, IKM, (size_t)IKM.Length);\n    }\n    public SecretKey(SecretKey master, uint child_index)\n    {   key = new byte[32];\n        blst_derive_child_eip2333(key, master.key, child_index);\n    }\n\n    public void from_bendian(byte[] inp)\n    {   if (inp.Length != 32)\n            throw new Exception(ERROR.BAD_ENCODING);\n        if (key == null) key = new byte[32];\n        blst_scalar_from_bendian(key, inp);\n        if (!blst_sk_check(key))\n            throw new Exception(ERROR.BAD_ENCODING);\n    }\n    public void from_lendian(byte[] inp)\n    {   if (inp.Length != 32)\n            throw new Exception(ERROR.BAD_ENCODING);\n        if (key == null) key = new byte[32];\n        blst_scalar_from_lendian(key, inp);\n        if (!blst_sk_check(key))\n            throw new Exception(ERROR.BAD_ENCODING);\n    }\n\n    public byte[] to_bendian()\n    {   byte[] ret = new byte[32];\n        blst_bendian_from_scalar(ret, key);\n        return ret;\n    }\n    public byte[] to_lendian()\n    {   byte[] ret = new byte[32];\n        blst_lendian_from_scalar(ret, key);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_be_bytes([Out] byte[] ret, [In] byte[] inp,\n                                                               size_t inp_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_scalar_from_le_bytes([Out] byte[] ret, [In] byte[] inp,\n                                                               size_t inp_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_add_n_check([Out] byte[] ret, [In] byte[] a,\n                                                         [In] byte[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_sub_n_check([Out] byte[] ret, [In] byte[] a,\n                                                         [In] byte[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_sk_mul_n_check([Out] byte[] ret, [In] byte[] a,\n                                                         [In] byte[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_sk_inverse([Out] byte[] ret, [In] byte[] a);\n\npublic struct Scalar {\n    internal byte[] val;\n\n    //public Scalar() { val = new byte[32]; }\n    public Scalar(byte[] inp, ByteOrder order=ByteOrder.BigEndian)\n    {   val = new byte[32];\n        switch(order) {\n        case ByteOrder.BigEndian:       from_bendian(inp);  break;\n        case ByteOrder.LittleEndian:    from_lendian(inp);  break;\n        }\n    }\n    private Scalar(bool _)      { val = new byte[32];               }\n    private Scalar(Scalar orig) { val = (byte[])orig.val.Clone();   }\n\n    public Scalar dup()         { return new Scalar(this);          }\n\n    public void from_bendian(byte[] inp)\n    {   if (val == null) val = new byte[32];\n        blst_scalar_from_be_bytes(val, inp, (size_t)inp.Length);\n    }\n    public void from_lendian(byte[] inp)\n    {   if (val == null) val = new byte[32];\n        blst_scalar_from_le_bytes(val, inp, (size_t)inp.Length);\n    }\n\n    public byte[] to_bendian()\n    {   byte[] ret = new byte[32];\n        blst_bendian_from_scalar(ret, val);\n        return ret;\n    }\n    public byte[] to_lendian()\n    {   byte[] ret = new byte[32];\n        blst_lendian_from_scalar(ret, val);\n        return ret;\n    }\n\n    public Scalar add(SecretKey a)\n    {   if (!blst_sk_add_n_check(val, val, a.key))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar add(Scalar a)\n    {   if (!blst_sk_add_n_check(val, val, a.val))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar sub(Scalar a)\n    {   if (!blst_sk_sub_n_check(val, val, a.val))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar mul(Scalar a)\n    {   if (!blst_sk_mul_n_check(val, val, a.val))\n            throw new Exception(ERROR.BAD_SCALAR);\n        return this;\n    }\n    public Scalar inverse()\n    {   blst_sk_inverse(val, val); return this;   }\n\n    public static Scalar operator+(Scalar a, Scalar b)\n    {   return a.dup().add(b);   }\n    public static Scalar operator-(Scalar a, Scalar b)\n    {   return a.dup().sub(b);   }\n    public static Scalar operator*(Scalar a, Scalar b)\n    {   return a.dup().mul(b);   }\n    public static Scalar operator/(Scalar a, Scalar b)\n    {   return b.dup().inverse().mul(a);   }\n}\n\nprivate const int P1_COMPRESSED_SZ = 384/8;\nprivate const int P2_COMPRESSED_SZ = 2*P1_COMPRESSED_SZ;\n\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_p1_affine_sizeof();\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_p1_deserialize([Out] long[] ret, [In] byte[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_affine_serialize([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_affine_compress([Out] byte[] ret, [In] long[] inp);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_to_affine([Out] long[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_on_curve([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_in_g1([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_is_inf([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_affine_is_equal([In] long[] a, [In] long[] b);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_p1_generator();\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_core_verify_pk_in_g2([In] long[] pk, [In] long[] sig,\n                                              bool hash_or_encode,\n                                              [In] byte[] msg, size_t msg_len,\n                                              [In] byte[] dst, size_t dst_len,\n                                              [In] byte[] aug, size_t aug_len);\n\npublic struct P1_Affine {\n    internal readonly long[] point;\n\n    private static readonly int sz = (int)blst_p1_affine_sizeof()/sizeof(long);\n\n    //public P1_Affine()            { point = new long[sz]; }\n    private P1_Affine(bool _)       { point = new long[sz]; }\n    private P1_Affine(P1_Affine p)  { point = (long[])p.point.Clone(); }\n\n    public P1_Affine(byte[] inp) : this(true)\n    {   int len = inp.Length;\n        if (len == 0 || len != ((inp[0]&0x80) == 0x80 ? P1_COMPRESSED_SZ\n                                                      : 2*P1_COMPRESSED_SZ))\n            throw new Exception(ERROR.BAD_ENCODING);\n        ERROR err = blst_p1_deserialize(point, inp);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n    }\n    public P1_Affine(P1 jacobian) : this(true)\n    {   blst_p1_to_affine(point, jacobian.point);   }\n\n    public P1_Affine dup()      { return new P1_Affine(this);   }\n    public P1 to_jacobian()     { return new P1(this);          }\n    public byte[] serialize()\n    {   byte[] ret = new byte[2*P1_COMPRESSED_SZ];\n        blst_p1_affine_serialize(ret, point);\n        return ret;\n    }\n    public byte[] compress()\n    {   byte[] ret = new byte[P1_COMPRESSED_SZ];\n        blst_p1_affine_compress(ret, point);\n        return ret;\n    }\n\n    public bool on_curve()      { return blst_p1_affine_on_curve(point);    }\n    public bool in_group()      { return blst_p1_affine_in_g1(point);       }\n    public bool is_inf()        { return blst_p1_affine_is_inf(point);      }\n    public bool is_equal(P1_Affine p)\n    {   return blst_p1_affine_is_equal(point, p.point);   }\n\n    ERROR core_verify(P2_Affine pk, bool hash_or_encode,\n                      byte[] msg, string DST = \"\", byte[] aug = null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        return blst_core_verify_pk_in_g2(pk.point, point,\n                                         hash_or_encode,\n                                         msg, (size_t)msg.Length,\n                                         dst, (size_t)dst.Length,\n                                         aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n\n    public static P1_Affine generator()\n    {   var ret = new P1_Affine(true);\n        Marshal.Copy(blst_p1_generator(), ret.point, 0, ret.point.Length);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_p1_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_serialize([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_compress([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_from_affine([Out] long[] ret, [In] long[] inp);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_on_curve([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_in_g1([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_is_inf([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p1_is_equal([In] long[] a, [In] long[] b);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_sk_to_pk_in_g1([Out] long[] ret, [In] byte[] SK);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_encode_to_g1([Out] long[] ret, [In] byte[] msg, size_t msg_len,\n                                         [In] byte[] dst, size_t dst_len,\n                                         [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_hash_to_g1([Out] long[] ret, [In] byte[] msg, size_t msg_len,\n                                       [In] byte[] dst, size_t dst_len,\n                                       [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_sign_pk_in_g2([Out] long[] ret, [In] long[] hash, [In] byte[] SK);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p1_mult([Out] long[] ret, [In] long[] a,\n                                    [In] byte[] scalar, size_t nbits);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_cneg([Out] long[] ret, bool cbit);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p1_add_or_double([Out] long[] ret, [In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p1_add_or_double_affine([Out] long[] ret, [In] long[] a,\n                                                    [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p1_double([Out] long[] ret, [In] long[] a);\n\npublic struct P1 {\n    internal long[] point;\n\n    private static readonly int sz = (int)blst_p1_sizeof()/sizeof(long);\n\n    //public P1()           { point = new long[sz]; }\n    private P1(bool _)      { point = new long[sz]; }\n    private P1(P1 p)        { point = (long[])p.point.Clone(); }\n    private long[] self()\n    {   if (point==null) { point = new long[sz]; } return point;   }\n\n    public P1(SecretKey sk) : this(true)\n    {   blst_sk_to_pk_in_g1(point, sk.key);   }\n    public P1(byte[] inp) : this(true)\n    {   int len = inp.Length;\n        if (len == 0 || len != ((inp[0]&0x80) == 0x80 ? P1_COMPRESSED_SZ\n                                                      : 2*P1_COMPRESSED_SZ))\n            throw new Exception(ERROR.BAD_ENCODING);\n        ERROR err = blst_p1_deserialize(point, inp);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n        blst_p1_from_affine(point, point);\n    }\n    public P1(P1_Affine affine) : this(true)\n    {   blst_p1_from_affine(point, affine.point);   }\n\n    public P1 dup()                 { return new P1(this);                  }\n    public P1_Affine to_affine()    { return new P1_Affine(this);           }\n    public byte[] serialize()\n    {   byte[] ret = new byte[2*P1_COMPRESSED_SZ];\n        blst_p1_serialize(ret, point);\n        return ret;\n    }\n    public byte[] compress()\n    {   byte[] ret = new byte[P1_COMPRESSED_SZ];\n        blst_p1_compress(ret, point);\n        return ret;\n    }\n\n    public bool on_curve()      { return blst_p1_on_curve(point);           }\n    public bool in_group()      { return blst_p1_in_g1(point);              }\n    public bool is_inf()        { return blst_p1_is_inf(point);             }\n    public bool is_equal(P1 p)  { return blst_p1_is_equal(point, p.point);  }\n\n    public P1 hash_to(byte[] msg, string DST=\"\", byte[] aug=null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        blst_hash_to_g1(self(), msg, (size_t)msg.Length,\n                                dst, (size_t)dst.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n        return this;\n    }\n    public P1 encode_to(byte[] msg, string DST=\"\", byte[] aug=null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        blst_encode_to_g1(self(), msg, (size_t)msg.Length,\n                                  dst, (size_t)dst.Length,\n                                  aug, (size_t)(aug!=null ? aug.Length : 0));\n        return this;\n    }\n\n    public P1 sign_with(SecretKey sk)\n    {   blst_sign_pk_in_g2(point, point, sk.key); return this;   }\n    public P1 sign_with(Scalar scalar)\n    {   blst_sign_pk_in_g2(point, point, scalar.val); return this;   }\n\n    public void aggregate(P1_Affine inp)\n    {   if (blst_p1_affine_in_g1(inp.point))\n            blst_p1_add_or_double_affine(point, point, inp.point);\n        else\n            throw new Exception(ERROR.POINT_NOT_IN_GROUP);\n    }\n\n    public P1 mult(byte[] scalar)\n    {   blst_p1_mult(point, point, scalar, (size_t)(scalar.Length*8));\n        return this;\n    }\n    public P1 mult(Scalar scalar)\n    {   blst_p1_mult(point, point, scalar.val, (size_t)255);\n        return this;\n    }\n    public P1 mult(BigInteger scalar)\n    {   byte[] val;\n        if (scalar.Sign < 0) {\n            val = BigInteger.Negate(scalar).ToByteArray();\n            blst_p1_cneg(point, true);\n        } else {\n            val = scalar.ToByteArray();\n        }\n        int len = val.Length;\n        if (val[len-1]==0) len--;\n        blst_p1_mult(point, point, val, (size_t)(len*8));\n        return this;\n    }\n    public P1 cneg(bool flag)   { blst_p1_cneg(point, flag); return this;   }\n    public P1 neg()             { blst_p1_cneg(point, true); return this;   }\n    public P1 add(P1 a)\n    {   blst_p1_add_or_double(point, point, a.point); return this;          }\n    public P1 add(P1_Affine a)\n    {   blst_p1_add_or_double_affine(point, point, a.point); return this;   }\n    public P1 dbl()\n    {   blst_p1_double(point, point); return this;                          }\n\n    public static P1 generator()\n    {   var ret = new P1(true);\n        Marshal.Copy(blst_p1_generator(), ret.point, 0, ret.point.Length);\n        return ret;\n    }\n}\n\npublic static P1 G1() { return P1.generator(); }\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_aggregated_in_g1([Out] long[] fp12, [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_aggregate_pk_in_g1([In, Out] long[] fp12,\n                                [In] long[] pk, [In] long[] sig,\n                                [In] byte[] msg, size_t msg_len,\n                                [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_mul_n_aggregate_pk_in_g1([In, Out] long[] fp12,\n                                [In] long[] pk, [In] long[] sig,\n                                [In] byte[] scalar, size_t nbits,\n                                [In] byte[] msg, size_t msg_len,\n                                [In] byte[] aug, size_t aug_len);\n\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_p2_affine_sizeof();\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_p2_deserialize([Out] long[] ret, [In] byte[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_affine_serialize([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_affine_compress([Out] byte[] ret, [In] long[] inp);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_to_affine([Out] long[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_affine_on_curve([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_affine_in_g2([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_affine_is_inf([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_affine_is_equal([In] long[] a, [In] long[] b);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_p2_generator();\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_core_verify_pk_in_g1([In] long[] pk, [In] long[] sig,\n                                              bool hash_or_encode,\n                                              [In] byte[] msg, size_t msg_len,\n                                              [In] byte[] dst, size_t dst_len,\n                                              [In] byte[] aug, size_t aug_len);\n\npublic struct P2_Affine {\n    internal readonly long[] point;\n\n    private static readonly int sz = (int)blst_p2_affine_sizeof()/sizeof(long);\n\n    //public P2_Affine()            { point = new long[sz]; }\n    private P2_Affine(bool _)       { point = new long[sz]; }\n    private P2_Affine(P2_Affine p)  { point = (long[])p.point.Clone(); }\n\n    public P2_Affine(byte[] inp) : this(true)\n    {   int len = inp.Length;\n        if (len == 0 || len != ((inp[0]&0x80) == 0x80 ? P2_COMPRESSED_SZ\n                                                      : 2*P2_COMPRESSED_SZ))\n            throw new Exception(ERROR.BAD_ENCODING);\n        ERROR err = blst_p2_deserialize(point, inp);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n    }\n    public P2_Affine(P2 jacobian) : this(true)\n    {   blst_p2_to_affine(point, jacobian.point);   }\n\n    public P2_Affine dup()      { return new P2_Affine(this);   }\n    public P2 to_jacobian()     { return new P2(this);          }\n    public byte[] serialize()\n    {   byte[] ret = new byte[2*P2_COMPRESSED_SZ];\n        blst_p2_affine_serialize(ret, point);\n        return ret;\n    }\n    public byte[] compress()\n    {   byte[] ret = new byte[P2_COMPRESSED_SZ];\n        blst_p2_affine_compress(ret, point);\n        return ret;\n    }\n\n    public bool on_curve()      { return blst_p2_affine_on_curve(point);    }\n    public bool in_group()      { return blst_p2_affine_in_g2(point);       }\n    public bool is_inf()        { return blst_p2_affine_is_inf(point);      }\n    public bool is_equal(P2_Affine p)\n    {   return blst_p2_affine_is_equal(point, p.point);   }\n\n    ERROR core_verify(P1_Affine pk, bool hash_or_encode,\n                      byte[] msg, string DST = \"\", byte[] aug = null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        return blst_core_verify_pk_in_g1(pk.point, point,\n                                         hash_or_encode,\n                                         msg, (size_t)msg.Length,\n                                         dst, (size_t)dst.Length,\n                                         aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n\n    public static P2_Affine generator()\n    {   var ret = new P2_Affine(true);\n        Marshal.Copy(blst_p2_generator(), ret.point, 0, ret.point.Length);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_p2_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_serialize([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_compress([Out] byte[] ret, [In] long[] inp);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_from_affine([Out] long[] ret, [In] long[] inp);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_on_curve([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_in_g2([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_is_inf([In] long[] point);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_p2_is_equal([In] long[] a, [In] long[] b);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_sk_to_pk_in_g2([Out] long[] ret, [In] byte[] SK);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_encode_to_g2([Out] long[] ret, [In] byte[] msg, size_t msg_len,\n                                         [In] byte[] dst, size_t dst_len,\n                                         [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_hash_to_g2([Out] long[] ret, [In] byte[] msg, size_t msg_len,\n                                       [In] byte[] dst, size_t dst_len,\n                                       [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_sign_pk_in_g1([Out] long[] ret, [In] long[] hash, [In] byte[] SK);\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p2_mult([Out] long[] ret, [In] long[] a,\n                                    [In] byte[] scalar, size_t nbits);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_cneg([Out] long[] ret, bool cbit);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p2_add_or_double([Out] long[] ret, [In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_p2_add_or_double_affine([Out] long[] ret, [In] long[] a,\n                                                    [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_p2_double([Out] long[] ret, [In] long[] a);\n\npublic struct P2 {\n    internal long[] point;\n\n    private static readonly int sz = (int)blst_p2_sizeof()/sizeof(long);\n\n    //public P2()           { point = new long[sz]; }\n    private P2(bool _)      { point = new long[sz]; }\n    private P2(P2 p)        { point = (long[])p.point.Clone(); }\n    private long[] self()\n    {   if (point==null) { point = new long[sz]; } return point;   }\n\n    public P2(SecretKey sk) : this(true)\n    {   blst_sk_to_pk_in_g2(point, sk.key);   }\n    public P2(byte[] inp) : this(true)\n    {   int len = inp.Length;\n        if (len == 0 || len != ((inp[0]&0x80) == 0x80 ? P2_COMPRESSED_SZ\n                                                      : 2*P2_COMPRESSED_SZ))\n            throw new Exception(ERROR.BAD_ENCODING);\n        ERROR err = blst_p2_deserialize(point, inp);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n        blst_p2_from_affine(point, point);\n    }\n    public P2(P2_Affine affine) : this(true)\n    {   blst_p2_from_affine(point, affine.point);   }\n\n    public P2 dup()                 { return new P2(this);                  }\n    public P2_Affine to_affine()    { return new P2_Affine(this);           }\n    public byte[] serialize()\n    {   byte[] ret = new byte[2*P2_COMPRESSED_SZ];\n        blst_p2_serialize(ret, point);\n        return ret;\n    }\n    public byte[] compress()\n    {   byte[] ret = new byte[P2_COMPRESSED_SZ];\n        blst_p2_compress(ret, point);\n        return ret;\n    }\n\n    public bool on_curve()      { return blst_p2_on_curve(point);           }\n    public bool in_group()      { return blst_p2_in_g2(point);              }\n    public bool is_inf()        { return blst_p2_is_inf(point);             }\n    public bool is_equal(P2 p)  { return blst_p2_is_equal(point, p.point);  }\n\n    public P2 hash_to(byte[] msg, string DST=\"\", byte[] aug=null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        blst_hash_to_g2(self(), msg, (size_t)msg.Length,\n                                dst, (size_t)dst.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n        return this;\n    }\n    public P2 encode_to(byte[] msg, string DST=\"\", byte[] aug=null)\n    {   byte[] dst = Encoding.UTF8.GetBytes(DST);\n        blst_encode_to_g2(self(), msg, (size_t)msg.Length,\n                                  dst, (size_t)dst.Length,\n                                  aug, (size_t)(aug!=null ? aug.Length : 0));\n        return this;\n    }\n\n    public P2 sign_with(SecretKey sk)\n    {   blst_sign_pk_in_g1(point, point, sk.key); return this;   }\n    public P2 sign_with(Scalar scalar)\n    {   blst_sign_pk_in_g1(point, point, scalar.val); return this;   }\n\n    public void aggregate(P2_Affine inp)\n    {   if (blst_p2_affine_in_g2(inp.point))\n            blst_p2_add_or_double_affine(point, point, inp.point);\n        else\n            throw new Exception(ERROR.POINT_NOT_IN_GROUP);\n    }\n\n    public P2 mult(byte[] scalar)\n    {   blst_p2_mult(point, point, scalar, (size_t)(scalar.Length*8));\n        return this;\n    }\n    public P2 mult(Scalar scalar)\n    {   blst_p2_mult(point, point, scalar.val, (size_t)255);\n        return this;\n    }\n    public P2 mult(BigInteger scalar)\n    {   byte[] val;\n        if (scalar.Sign < 0) {\n            val = BigInteger.Negate(scalar).ToByteArray();\n            blst_p2_cneg(point, true);\n        } else {\n            val = scalar.ToByteArray();\n        }\n        int len = val.Length;\n        if (val[len-1]==0) len--;\n        blst_p2_mult(point, point, val, (size_t)(len*8));\n        return this;\n    }\n    public P2 cneg(bool flag)   { blst_p2_cneg(point, flag); return this;   }\n    public P2 neg()             { blst_p2_cneg(point, true); return this;   }\n    public P2 add(P2 a)\n    {   blst_p2_add_or_double(point, point, a.point); return this;          }\n    public P2 add(P2_Affine a)\n    {   blst_p2_add_or_double_affine(point, point, a.point); return this;   }\n    public P2 dbl()\n    {   blst_p2_double(point, point); return this;                          }\n\n    public static P2 generator()\n    {   var ret = new P2(true);\n        Marshal.Copy(blst_p2_generator(), ret.point, 0, ret.point.Length);\n        return ret;\n    }\n}\n\npublic static P2 G2() { return P2.generator(); }\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_aggregated_in_g2([Out] long[] fp12, [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_aggregate_pk_in_g2([In, Out] long[] fp12,\n                                [In] long[] pk, [In] long[] sig,\n                                [In] byte[] msg, size_t msg_len,\n                                [In] byte[] aug, size_t aug_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_mul_n_aggregate_pk_in_g2([In, Out] long[] fp12,\n                                [In] long[] pk, [In] long[] sig,\n                                [In] byte[] scalar, size_t nbits,\n                                [In] byte[] msg, size_t msg_len,\n                                [In] byte[] aug, size_t aug_len);\n\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_fp12_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_miller_loop([Out] long[] fp12, [In] long[] q,\n                                                       [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_is_one([In] long[] fp12);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_is_equal([In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_fp12_sqr([Out] long[] ret, [In] long[] a);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_fp12_mul([Out] long[] ret, [In] long[] a,\n                                                   [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_final_exp([Out] long[] ret, [In] long[] a);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_finalverify([In] long[] a, [In] long[] b);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_fp12_one();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_fp12_in_group([In] long[] a);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_bendian_from_fp12([Out] byte[] ret, [In] long[] a);\n\npublic struct PT {\n    internal readonly long[] fp12;\n\n    private static readonly int sz = (int)blst_fp12_sizeof()/sizeof(long);\n\n    internal PT(bool _)     { fp12 = new long[sz]; }\n    private PT(PT orig)     { fp12 = (long[])orig.fp12.Clone(); }\n\n    public PT(P1_Affine p) : this(true)\n    {   blst_aggregated_in_g1(fp12, p.point);   }\n    public PT(P1 p) : this(true)\n    {   blst_aggregated_in_g1(fp12, (new P1_Affine(p)).point);   }\n    public PT(P2_Affine q) : this(true)\n    {   blst_aggregated_in_g2(fp12, q.point);   }\n    public PT(P2 q) : this(true)\n    {   blst_aggregated_in_g2(fp12, (new P2_Affine(q)).point);   }\n    public PT(P2_Affine q, P1_Affine p) : this(true)\n    {   blst_miller_loop(fp12, q.point, p.point);   }\n    public PT(P1_Affine p, P2_Affine q) : this(q, p) {}\n    public PT(P2 q, P1 p) : this(true)\n    {   blst_miller_loop(fp12, (new P2_Affine(q)).point,\n                               (new P1_Affine(p)).point);\n    }\n    public PT(P1 p, P2 q) : this(q, p) {}\n\n    public PT dup()         { return new PT(this); }\n    public bool is_one()    { return blst_fp12_is_one(fp12); }\n    public bool is_equal(PT p)\n    {   return blst_fp12_is_equal(fp12, p.fp12);   }\n    public PT sqr()         { blst_fp12_sqr(fp12, fp12);         return this; }\n    public PT mul(PT p)     { blst_fp12_mul(fp12, fp12, p.fp12); return this; }\n    public PT final_exp()   { blst_final_exp(fp12, fp12);        return this; }\n    public bool in_group()  { return blst_fp12_in_group(fp12); }\n    public byte[] to_bendian()\n    {   byte[] ret = new byte[12*P1_COMPRESSED_SZ];\n        blst_bendian_from_fp12(ret, fp12);\n        return ret;\n    }\n\n    public static bool finalverify(PT gt1, PT gt2)\n    {   return blst_fp12_finalverify(gt1.fp12, gt2.fp12);   }\n\n    public static PT one()\n    {   var ret = new PT(true);\n        Marshal.Copy(blst_fp12_one(), ret.fp12, 0, ret.fp12.Length);\n        return ret;\n    }\n}\n\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern size_t blst_pairing_sizeof();\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_pairing_init([In, Out] long[] ctx, bool hash_or_encode,\n                                             [In] ref long dst, size_t dst_len);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern void blst_pairing_commit([In, Out] long[] ctx);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern ERROR blst_pairing_merge([In, Out] long[] ctx, [In] long[] ctx1);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern bool blst_pairing_finalverify([In] long[] ctx, [In] long[] sig);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern\nvoid blst_pairing_raw_aggregate([In, Out] long[] ctx, [In] long[] q,\n                                                      [In] long[] p);\n[DllImport(\"blst.dll\", CallingConvention = CallingConvention.Cdecl)]\nstatic extern IntPtr blst_pairing_as_fp12([In] long[] ctx);\n\npublic struct Pairing {\n    private readonly long[] ctx;\n\n    private static readonly int sz = (int)blst_pairing_sizeof()/sizeof(long);\n\n    public Pairing(bool hash_or_encode=false, string DST=\"\")\n    {\n        byte[] dst = Encoding.UTF8.GetBytes(DST);\n        int dst_len = dst.Length;\n        int add_len = dst_len!=0 ? (dst_len+sizeof(long)-1)/sizeof(long) : 1;\n        Array.Resize(ref dst, add_len*sizeof(long));\n\n        ctx = new long[sz+add_len];\n\n        for (int i=0; i<add_len; i++)\n            ctx[sz+i] = BitConverter.ToInt64(dst, i*sizeof(long));\n\n        GCHandle h = GCHandle.Alloc(ctx, GCHandleType.Pinned);\n        blst_pairing_init(ctx, hash_or_encode, ref ctx[sz], (size_t)dst_len);\n        h.Free();\n    }\n\n    public ERROR aggregate(P1_Affine pk, Nullable<P2_Affine> sig,\n                                         byte[] msg, byte[] aug=null)\n    {   return blst_pairing_aggregate_pk_in_g1(ctx, pk.point,\n                                sig.HasValue ? sig.Value.point : null,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n    public ERROR aggregate(P2_Affine pk, Nullable<P1_Affine> sig,\n                                         byte[] msg, byte[] aug=null)\n    {   return blst_pairing_aggregate_pk_in_g2(ctx, pk.point,\n                                sig.HasValue ? sig.Value.point : null,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n    public ERROR mul_n_aggregate(P2_Affine pk, P1_Affine sig,\n                                               byte[] scalar, int nbits,\n                                               byte[] msg, byte[] aug=null)\n    {   return blst_pairing_mul_n_aggregate_pk_in_g2(ctx, pk.point, sig.point,\n                                scalar, (size_t)nbits,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n    public ERROR mul_n_aggregate(P1_Affine pk, P2_Affine sig,\n                                               byte[] scalar, int nbits,\n                                               byte[] msg, byte[] aug=null)\n    {   return blst_pairing_mul_n_aggregate_pk_in_g1(ctx, pk.point, sig.point,\n                                scalar, (size_t)nbits,\n                                msg, (size_t)msg.Length,\n                                aug, (size_t)(aug!=null ? aug.Length : 0));\n    }\n\n    public void commit()    { blst_pairing_commit(ctx); }\n    public void merge(Pairing a)\n    {   var err = blst_pairing_merge(ctx, a.ctx);\n        if (err != ERROR.SUCCESS)\n            throw new Exception(err);\n    }\n    public bool finalverify(PT sig=new PT())\n    {   return blst_pairing_finalverify(ctx, sig.fp12);   }\n\n    public void raw_aggregate(P2_Affine q, P1_Affine p)\n    {   blst_pairing_raw_aggregate(ctx, q.point, p.point);   }\n    public void raw_aggregate(P1_Affine p, P2_Affine q)\n    {   raw_aggregate(q, p);   }\n    public void raw_aggregate(P2 q, P1 p)\n    {   blst_pairing_raw_aggregate(ctx, (new P2_Affine(q)).point,\n                                        (new P1_Affine(p)).point);\n    }\n    public void raw_aggregate(P1 p, P2 q)\n    {   raw_aggregate(q, p);   }\n    public PT as_fp12()\n    {   var ret = new PT(true);\n        GCHandle h = GCHandle.Alloc(ctx, GCHandleType.Pinned);\n        Marshal.Copy(blst_pairing_as_fp12(ctx), ret.fp12, 0, ret.fp12.Length);\n        h.Free();\n        return ret;\n    }\n}\n}}\n"
  },
  {
    "path": "bindings/go/README.md",
    "content": "# blst [![Lint Status](https://github.com/supranational/blst/workflows/golang-lint/badge.svg)](https://github.com/supranational/blst/actions/workflows/golang-lint.yml)\n\nThe `blst` package provides a Go interface to the blst BLS12-381 signature library.\n\n## Build\nThe build process consists of two steps, code generation followed by compilation.\n\n```\n./generate.py # Optional - only required if making code changes\ngo build\ngo test\n```\n\nThe generate.py script is used to generate both min-pk and min-sig variants of the binding from a common code base. It consumes the `*.tgo` files along with `blst_minpk_test.go` and produces `blst.go` and `blst_minsig_test.go`. The .tgo files can treated as if they were .go files, including the use of gofmt and goimports. The generate script will filter out extra imports while processing and automatically run goimports on the final blst.go file.\n\nAfter running generate.py, <nobr>`go build`</nobr> and <nobr>`go test`</nobr> can be run as usual. Cgo will compile `cgo_server.c`, which includes the required C implementation files, and `cgo_assembly.S`, which includes appropriate pre-generated assembly code for the platform.\n\n#### Caveats\n\nIf the test or target application crashes with an \"illegal instruction\" exception [after copying to an older system], rebuild with `CGO_CFLAGS` environment variable set to <nobr>`-O2 -D__BLST_PORTABLE__`</nobr>. Don't forget <nobr>`-O2`</nobr>!\n\nOn Windows the C compiler invoked by cgo, one denoted in `go env CC` output, has to target [MinGW](https://www.mingw-w64.org/). Verify with `<go-env-CC-output> -dM -E -x c nul: | findstr \"MINGW64\"`.\n\nIf you're cross-compiling, you have to set `CC` environment variable to the target C cross-compiler and `CGO_ENABLED` to 1. For example, to compile the test program for ARM:\n```\nenv GOARCH=arm CC=arm-linux-gnueabi-gcc CGO_ENABLED=1 go test -c\n```\n\n## Usage\nThere are two primary modes of operation that can be chosen based on type definitions in the application.\n\nFor minimal-pubkey-size operations the application would define core types as:\n```\ntype PublicKey = blst.P1Affine\ntype Signature = blst.P2Affine\ntype AggregateSignature = blst.P2Aggregate\ntype AggregatePublicKey = blst.P1Aggregate\n```\n\nFor minimal-signature-size operations:\n```\ntype PublicKey = blst.P2Affine\ntype Signature = blst.P1Affine\ntype AggregateSignature = blst.P1Aggregate\ntype AggregatePublicKey = blst.P2Aggregate\n```\n\nA complete example for generating a key, signing a message, and verifying the message:\n```\npackage main\n\nimport (\n\t\"crypto/rand\"\n\t\"fmt\"\n\n\tblst \"github.com/supranational/blst/bindings/go\"\n)\n\ntype PublicKey = blst.P1Affine\ntype Signature = blst.P2Affine\ntype AggregateSignature = blst.P2Aggregate\ntype AggregatePublicKey = blst.P1Aggregate\n\nfunc main() {\n\tvar ikm [32]byte\n\t_, _ = rand.Read(ikm[:])\n\tsk := blst.KeyGen(ikm[:])\n\tpk := new(PublicKey).From(sk)\n\n\tvar dst = []byte(\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_NUL_\")\n\tmsg := []byte(\"hello foo\")\n\tsig := new(Signature).Sign(sk, msg, dst)\n\n\tif !sig.Verify(true, pk, true, msg, dst) {\n\t\tfmt.Println(\"ERROR: Invalid!\")\n\t} else {\n\t\tfmt.Println(\"Valid!\")\n\t}\n}\n```\n\nSee the tests for further examples of usage.\n\n## Core Methods\n\n### SecretKey Methods\n- `KeyGen(ikm []byte, optional ...[]byte) *SecretKey` - Derive the secret key scalar from secret input key material, optionally application-specific\n- `Serialize() []byte` - Serialize the secret key to bytes\n- `Deserialize(data []byte) *SecretKey` - Deserialize secret key from bytes\n- `Zeroize()` - Securely zero out the secret key\n\n### PublicKey (P1Affine in minimal-pubkey-size) Methods\n- `From(sk *SecretKey) *PublicKey` - Derive public key from secret key\n- `Compress() []byte` - Serialize public key to compressed format\n- `Uncompress(data []byte) *PublicKey` - Decompress public key from bytes\n- `Serialize() []byte` - Serialize public key to uncompressed format\n- `Deserialize(data []byte) *PublicKey` - Deserialize public key from bytes\n\n### Signature (P2Affine in minimal-pubkey-size) Methods\n- `Sign(sk *SecretKey, msg []byte, dst []byte, ...interface{}) *Signature` - Sign a message\n- `Compress() []byte` - Serialize signature to compressed format\n- `Uncompress(data []byte) *Signature` - Decompress signature from bytes\n- `BatchUncompress(compressedSigs [][]byte) []*Signature` - Efficiently uncompress multiple signatures\n- `Serialize() []byte` - Serialize public key to uncompressed format\n- `Deserialize(data []byte) *Signature` - Deserialize public key from bytes\n- `Verify(sigCheck bool, pk *PublicKey, pkCheck bool, msg []byte, dst []byte, ...interface{}) bool` - Verify a signature\n- `VerifyCompressed(sig []byte, sigCheck bool, pk []byte, msgCheck bool, msg []byte, dst []byte, ...interface{}) bool` - Verify a serialized signature in compressed format\n- `AggregateVerify(sigCheck bool, pks []*PublicKey, msgCheck bool, msgs [][]byte, dst []byte) bool` - Verify an aggregated signature for multiple messages\n- `AggregateVerifyCompressed(sig []byte, sigCheck bool, pks [][]byte, msgCheck bool, msgs [][]byte, dst []byte) bool` - Verify an aggregated serialized signature in compressed format\n- `FastAggregateVerify(sigCheck bool, pks []*PublicKey, msg []byte, dst []byte) bool` - Fast verify for same message\n- `MultipleAggregateVerify(sigs []*Signature, sigCheck bool, pks []*PublicKey, msgCheck bool, msgs [][]byte, dst []byte, randFn func(*Scalar), randBits int) bool` - Verify multiple signatures\n\n### Aggregate Methods\n- `AggregatePublicKey.Aggregate(pks []*PublicKey, check bool)` - Aggregate multiple public keys\n- `AggregateSignature.Aggregate(sigs []*Signature, check bool)` - Aggregate multiple signatures\n- `AggregateSignature.AggregateCompressed(compressedSigs [][]byte, check bool)` - Aggregate muliple serialized signatures in compressed format\n- `AggregatePublicKey.ToAffine() *PublicKey` - Convert aggregate to affine form\n- `AggrefateSignature.ToAffine() *Signature` - Convert aggregate to affine form\n\n## Utility Functions\n- `HashToG1(msg []byte, dst []byte, optional... []byte) *P1` - Hash message [with optional augmentation] to G1 point\n- `HashToG2(msg []byte, dst []byte, optional... []byte) *P2` - Hash message [with optional augmentation] to G2 point\n- `P1Generator() *P1` - Get G1 generator point\n- `P2Generator() *P2` - Get G2 generator point\n- `Uniq(msgs [][]byte)` - Check messages for uniqueness\n- `SetMaxProcs(procs int)` - Set maximum number of threads for parallel operations\n"
  },
  {
    "path": "bindings/go/blst.go",
    "content": "// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n// DO NOT MODIFY THIS FILE!!\n// The file is generated from *.tgo by generate.py\n// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\npackage blst\n\n// #cgo CFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../../build -I${SRCDIR}/../../src -D__BLST_CGO__ -fno-builtin-memcpy -fno-builtin-memset\n// #cgo amd64 CFLAGS: -D__ADX__ -mno-avx\n// // no-asm 64-bit platforms from https://go.dev/doc/install/source\n// #cgo loong64 mips64 mips64le ppc64 ppc64le riscv64 s390x CFLAGS: -D__BLST_NO_ASM__\n//\n// #include \"blst.h\"\n//\n// #if defined(__x86_64__) && (defined(__unix__) || defined(__APPLE__))\n// # include <signal.h>\n// # include <unistd.h>\n// static void handler(int signum)\n// {   ssize_t n = write(2, \"Caught SIGILL in blst_cgo_init, \"\n//                          \"consult <blst>/bindings/go/README.md.\\n\", 70);\n//     _exit(128+SIGILL);\n//     (void)n;\n// }\n// __attribute__((constructor)) static void blst_cgo_init()\n// {   blst_fp temp = { 0 };\n//     struct sigaction act = { handler }, oact;\n//     sigaction(SIGILL, &act, &oact);\n//     blst_fp_sqr(&temp, &temp);\n//     sigaction(SIGILL, &oact, NULL);\n// }\n// #endif\n//\n// static void go_pairing_init(blst_pairing *new_ctx, bool hash_or_encode,\n//                             const byte *DST, size_t DST_len)\n// {   if (DST != NULL) {\n//         byte *dst = (byte*)new_ctx + blst_pairing_sizeof();\n//         for(size_t i = 0; i < DST_len; i++) dst[i] = DST[i];\n//         DST = dst;\n//     }\n//     blst_pairing_init(new_ctx, hash_or_encode, DST, DST_len);\n// }\n// static void go_pairing_as_fp12(blst_fp12 *pt, blst_pairing *ctx)\n// {   *pt = *blst_pairing_as_fp12(ctx);   }\n//\n// static void go_p1slice_to_affine(blst_p1_affine dst[],\n//                                  const blst_p1 points[], size_t npoints)\n// {   const blst_p1 *ppoints[2] = { points, NULL };\n//     blst_p1s_to_affine(dst, ppoints, npoints);\n// }\n// static void go_p1slice_add(blst_p1 *dst, const blst_p1_affine points[],\n//                                          size_t npoints)\n// {   const blst_p1_affine *ppoints[2] = { points, NULL };\n//     blst_p1s_add(dst, ppoints, npoints);\n// }\n// static void go_p2slice_to_affine(blst_p2_affine dst[],\n//                                  const blst_p2 points[], size_t npoints)\n// {   const blst_p2 *ppoints[2] = { points, NULL };\n//     blst_p2s_to_affine(dst, ppoints, npoints);\n// }\n// static void go_p2slice_add(blst_p2 *dst, const blst_p2_affine points[],\n//                                          size_t npoints)\n// {   const blst_p2_affine *ppoints[2] = { points, NULL };\n//     blst_p2s_add(dst, ppoints, npoints);\n// }\n//\n// static void go_p1_mult_n_acc(blst_p1 *acc, const blst_fp *x, bool affine,\n//                                            const byte *scalar, size_t nbits)\n// {   blst_p1 m[1];\n//     const void *p = x;\n//     if (p == NULL)\n//         p = blst_p1_generator();\n//     else if (affine)\n//         blst_p1_from_affine(m, p), p = m;\n//     blst_p1_mult(m, p, scalar, nbits);\n//     blst_p1_add_or_double(acc, acc, m);\n// }\n// static void go_p2_mult_n_acc(blst_p2 *acc, const blst_fp2 *x, bool affine,\n//                                            const byte *scalar, size_t nbits)\n// {   blst_p2 m[1];\n//     const void *p = x;\n//     if (p == NULL)\n//         p = blst_p2_generator();\n//     else if (affine)\n//         blst_p2_from_affine(m, p), p = m;\n//     blst_p2_mult(m, p, scalar, nbits);\n//     blst_p2_add_or_double(acc, acc, m);\n// }\n//\n// static void go_p1_sub_assign(blst_p1 *a, const blst_fp *x, bool affine)\n// {   blst_p1 minus_b;\n//     if (affine)\n//         blst_p1_from_affine(&minus_b, (const blst_p1_affine*)x);\n//     else\n//         minus_b = *(const blst_p1*)x;\n//     blst_p1_cneg(&minus_b, 1);\n//     blst_p1_add_or_double(a, a, &minus_b);\n// }\n//\n// static void go_p2_sub_assign(blst_p2 *a, const blst_fp2 *x, bool affine)\n// {   blst_p2 minus_b;\n//     if (affine)\n//         blst_p2_from_affine(&minus_b, (const blst_p2_affine*)x);\n//     else\n//         minus_b = *(const blst_p2*)x;\n//     blst_p2_cneg(&minus_b, 1);\n//     blst_p2_add_or_double(a, a, &minus_b);\n// }\n//\n// static bool go_scalar_from_bendian(blst_scalar *ret, const byte *in)\n// {   blst_scalar_from_bendian(ret, in);\n//     return blst_sk_check(ret);\n// }\n// static bool go_hash_to_scalar(blst_scalar *ret,\n//                               const byte *msg, size_t msg_len,\n//                               const byte *DST, size_t DST_len)\n// {   byte elem[48];\n//     blst_expand_message_xmd(elem, sizeof(elem), msg, msg_len, DST, DST_len);\n//     return blst_scalar_from_be_bytes(ret, elem, sizeof(elem));\n// }\n// static void go_miller_loop_n(blst_fp12 *dst, const blst_p2_affine Q[],\n//                                              const blst_p1_affine P[],\n//                                              size_t npoints, bool acc)\n// {   const blst_p2_affine *Qs[2] = { Q, NULL };\n//     const blst_p1_affine *Ps[2] = { P, NULL };\n//     if (acc) {\n//         blst_fp12 tmp;\n//         blst_miller_loop_n(&tmp, Qs, Ps, npoints);\n//         blst_fp12_mul(dst, dst, &tmp);\n//     } else {\n//         blst_miller_loop_n(dst, Qs, Ps, npoints);\n//     }\n// }\n// static void go_fp12slice_mul(blst_fp12 *dst, const blst_fp12 in[], size_t n)\n// {   size_t i;\n//     blst_fp12_mul(dst, &in[0], &in[1]);\n//     for (i = 2; i < n; i++)\n//         blst_fp12_mul(dst, dst, &in[i]);\n// }\n// static bool go_p1_affine_validate(const blst_p1_affine *p, bool infcheck)\n// {   if (infcheck && blst_p1_affine_is_inf(p))\n//         return 0;\n//     return blst_p1_affine_in_g1(p);\n// }\n// static bool go_p2_affine_validate(const blst_p2_affine *p, bool infcheck)\n// {   if (infcheck && blst_p2_affine_is_inf(p))\n//         return 0;\n//     return blst_p2_affine_in_g2(p);\n// }\nimport \"C\"\n\nimport (\n\t\"fmt\"\n\t\"math/bits\"\n\t\"runtime\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"unsafe\"\n)\n\nconst BLST_SCALAR_BYTES = 256 / 8\nconst BLST_FP_BYTES = 384 / 8\nconst BLST_P1_COMPRESS_BYTES = BLST_FP_BYTES\nconst BLST_P1_SERIALIZE_BYTES = BLST_FP_BYTES * 2\nconst BLST_P2_COMPRESS_BYTES = BLST_FP_BYTES * 2\nconst BLST_P2_SERIALIZE_BYTES = BLST_FP_BYTES * 4\n\ntype Scalar struct{ cgo C.blst_scalar }\ntype Fp struct{ cgo C.blst_fp }\ntype Fp2 struct{ cgo C.blst_fp2 }\ntype Fp6 = C.blst_fp6\ntype Fp12 struct{ cgo C.blst_fp12 }\ntype P1 struct{ cgo C.blst_p1 }\ntype P2 struct{ cgo C.blst_p2 }\ntype P1Affine struct{ cgo C.blst_p1_affine }\ntype P2Affine struct{ cgo C.blst_p2_affine }\ntype Message = []byte\ntype Pairing = []C.blst_pairing\ntype SecretKey = Scalar\ntype P1s []P1\ntype P2s []P2\ntype P1Affines []P1Affine\ntype P2Affines []P2Affine\n\n//\n// Configuration\n//\n\nvar maxProcs = initMaxProcs()\n\nfunc initMaxProcs() int {\n\tmaxProcs := runtime.GOMAXPROCS(0)\n\tvar version float32\n\t_, err := fmt.Sscanf(runtime.Version(), \"go%f\", &version)\n\tif err != nil || version < 1.14 {\n\t\t// be cooperative and leave one processor for the application\n\t\tmaxProcs -= 1\n\t}\n\tif maxProcs <= 0 {\n\t\tmaxProcs = 1\n\t}\n\treturn maxProcs\n}\n\nfunc SetMaxProcs(procs int) {\n\tif procs <= 0 {\n\t\tprocs = 1\n\t}\n\tmaxProcs = procs\n}\n\nfunc numThreads(maxThreads int) int {\n\tnumThreads := maxProcs\n\n\t// take into consideration the possility that application reduced\n\t// GOMAXPROCS after |maxProcs| was initialized\n\tnumProcs := runtime.GOMAXPROCS(0)\n\tif maxProcs > numProcs {\n\t\tnumThreads = numProcs\n\t}\n\n\tif maxThreads > 0 && numThreads > maxThreads {\n\t\treturn maxThreads\n\t}\n\treturn numThreads\n}\n\nvar cgo_pairingSizeOf = C.blst_pairing_sizeof()\nvar cgo_p1Generator = P1{*C.blst_p1_generator()}\nvar cgo_p2Generator = P2{*C.blst_p2_generator()}\nvar cgo_fp12One = Fp12{*C.blst_fp12_one()}\n\n// Secret key\nfunc (sk *SecretKey) Zeroize() {\n\tvar zero SecretKey\n\t*sk = zero\n}\n\nfunc KeyGen(ikm []byte, optional ...[]byte) *SecretKey {\n\tvar sk SecretKey\n\tvar info []byte\n\tif len(optional) > 0 {\n\t\tinfo = optional[0]\n\t}\n\tif len(ikm) < 32 {\n\t\treturn nil\n\t}\n\tC.blst_keygen(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n\t\tptrOrNil(info), C.size_t(len(info)))\n\t// Postponing secret key zeroing till garbage collection can be too\n\t// late to be effective, but every little bit helps...\n\truntime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n\treturn &sk\n}\n\nfunc KeyGenV3(ikm []byte, optional ...[]byte) *SecretKey {\n\tif len(ikm) < 32 {\n\t\treturn nil\n\t}\n\tvar sk SecretKey\n\tvar info []byte\n\tif len(optional) > 0 {\n\t\tinfo = optional[0]\n\t}\n\tC.blst_keygen_v3(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n\t\tptrOrNil(info), C.size_t(len(info)))\n\t// Postponing secret key zeroing till garbage collection can be too\n\t// late to be effective, but every little bit helps...\n\truntime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n\treturn &sk\n}\n\nfunc KeyGenV45(ikm []byte, salt []byte, optional ...[]byte) *SecretKey {\n\tif len(ikm) < 32 {\n\t\treturn nil\n\t}\n\tvar sk SecretKey\n\tvar info []byte\n\tif len(optional) > 0 {\n\t\tinfo = optional[0]\n\t}\n\tC.blst_keygen_v4_5(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n\t\t(*C.byte)(&salt[0]), C.size_t(len(salt)),\n\t\tptrOrNil(info), C.size_t(len(info)))\n\t// Postponing secret key zeroing till garbage collection can be too\n\t// late to be effective, but every little bit helps...\n\truntime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n\treturn &sk\n}\n\nfunc KeyGenV5(ikm []byte, salt []byte, optional ...[]byte) *SecretKey {\n\tif len(ikm) < 32 {\n\t\treturn nil\n\t}\n\tvar sk SecretKey\n\tvar info []byte\n\tif len(optional) > 0 {\n\t\tinfo = optional[0]\n\t}\n\tsaltLen := len(salt)\n\tif saltLen == 0 {\n\t\tsalt = []byte{0}\n\t}\n\tC.blst_keygen_v5(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n\t\t(*C.byte)(&salt[0]), C.size_t(saltLen),\n\t\tptrOrNil(info), C.size_t(len(info)))\n\t// Postponing secret key zeroing till garbage collection can be too\n\t// late to be effective, but every little bit helps...\n\truntime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n\treturn &sk\n}\n\nfunc DeriveMasterEip2333(ikm []byte) *SecretKey {\n\tif len(ikm) < 32 {\n\t\treturn nil\n\t}\n\tvar sk SecretKey\n\tC.blst_derive_master_eip2333(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)))\n\t// Postponing secret key zeroing till garbage collection can be too\n\t// late to be effective, but every little bit helps...\n\truntime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n\treturn &sk\n}\n\nfunc (master *SecretKey) DeriveChildEip2333(child_index uint32) *SecretKey {\n\tvar sk SecretKey\n\tC.blst_derive_child_eip2333(&sk.cgo, &master.cgo, C.uint(child_index))\n\t// Postponing secret key zeroing till garbage collection can be too\n\t// late to be effective, but every little bit helps...\n\truntime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n\treturn &sk\n}\n\n// Pairing\nfunc pairingSizeOf(DST_len C.size_t) int {\n\treturn int((cgo_pairingSizeOf + DST_len + 7) / 8)\n}\n\nfunc PairingCtx(hash_or_encode bool, DST []byte) Pairing {\n\tDST_len := C.size_t(len(DST))\n\tctx := make([]C.blst_pairing, pairingSizeOf(DST_len))\n\tC.go_pairing_init(&ctx[0], C.bool(hash_or_encode), ptrOrNil(DST), DST_len)\n\treturn ctx\n}\n\nfunc PairingCommit(ctx Pairing) {\n\tC.blst_pairing_commit(&ctx[0])\n}\n\nfunc PairingMerge(ctx Pairing, ctx1 Pairing) int {\n\tr := C.blst_pairing_merge(&ctx[0], &ctx1[0])\n\treturn int(r)\n}\n\nfunc PairingFinalVerify(ctx Pairing, optional ...*Fp12) bool {\n\tvar gtsig *Fp12\n\tif len(optional) > 0 {\n\t\tgtsig = optional[0]\n\t}\n\treturn bool(C.blst_pairing_finalverify(&ctx[0], gtsig.asPtr()))\n}\n\nfunc PairingRawAggregate(ctx Pairing, q *P2Affine, p *P1Affine) {\n\tC.blst_pairing_raw_aggregate(&ctx[0], &q.cgo, &p.cgo)\n}\n\nfunc PairingAsFp12(ctx Pairing) *Fp12 {\n\tvar pt Fp12\n\tC.go_pairing_as_fp12(&pt.cgo, &ctx[0])\n\treturn &pt\n}\n\nfunc Fp12One() Fp12 {\n\treturn cgo_fp12One\n}\n\nfunc Fp12FinalVerify(pt1 *Fp12, pt2 *Fp12) bool {\n\treturn bool(C.blst_fp12_finalverify(&pt1.cgo, &pt2.cgo))\n}\n\nfunc Fp12MillerLoop(q *P2Affine, p *P1Affine) *Fp12 {\n\tvar pt Fp12\n\tC.blst_miller_loop(&pt.cgo, &q.cgo, &p.cgo)\n\treturn &pt\n}\n\nfunc Fp12MillerLoopN(qs []P2Affine, ps []P1Affine) *Fp12 {\n\tif len(qs) != len(ps) || len(qs) == 0 {\n\t\tpanic(\"inputs' lengths mismatch\")\n\t}\n\n\tnElems := uint32(len(qs))\n\tnThreads := uint32(maxProcs)\n\n\tif nThreads == 1 || nElems == 1 {\n\t\tvar pt Fp12\n\t\tC.go_miller_loop_n(&pt.cgo, &qs[0].cgo, &ps[0].cgo, C.size_t(nElems), false)\n\t\treturn &pt\n\t}\n\n\tstride := (nElems + nThreads - 1) / nThreads\n\tif stride > 16 {\n\t\tstride = 16\n\t}\n\n\tstrides := (nElems + stride - 1) / stride\n\tif nThreads > strides {\n\t\tnThreads = strides\n\t}\n\n\tmsgsCh := make(chan Fp12, nThreads)\n\tcurElem := uint32(0)\n\n\tfor tid := uint32(0); tid < nThreads; tid++ {\n\t\tgo func() {\n\t\t\tacc := Fp12One()\n\t\t\tfirst := true\n\t\t\tfor {\n\t\t\t\twork := atomic.AddUint32(&curElem, stride) - stride\n\t\t\t\tif work >= nElems {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tn := nElems - work\n\t\t\t\tif n > stride {\n\t\t\t\t\tn = stride\n\t\t\t\t}\n\t\t\t\tC.go_miller_loop_n(&acc.cgo, &qs[work].cgo, &ps[work].cgo, C.size_t(n),\n\t\t\t\t\tC.bool(!first))\n\t\t\t\tfirst = false\n\t\t\t}\n\t\t\tmsgsCh <- acc\n\t\t}()\n\t}\n\n\tvar ret = make([]Fp12, nThreads)\n\tfor i := range ret {\n\t\tret[i] = <-msgsCh\n\t}\n\n\tvar pt Fp12\n\tC.go_fp12slice_mul(&pt.cgo, &ret[0].cgo, C.size_t(nThreads))\n\treturn &pt\n}\n\nfunc (pt *Fp12) MulAssign(p *Fp12) {\n\tC.blst_fp12_mul(&pt.cgo, &pt.cgo, &p.cgo)\n}\n\nfunc (pt *Fp12) FinalExp() {\n\tC.blst_final_exp(&pt.cgo, &pt.cgo)\n}\n\nfunc (pt *Fp12) InGroup() bool {\n\treturn bool(C.blst_fp12_in_group(&pt.cgo))\n}\n\nfunc (pt *Fp12) ToBendian() []byte {\n\tvar out [BLST_FP_BYTES * 12]byte\n\tC.blst_bendian_from_fp12((*C.byte)(&out[0]), &pt.cgo)\n\treturn out[:]\n}\n\nfunc (pt1 *Fp12) Equals(pt2 *Fp12) bool {\n\treturn *pt1 == *pt2\n}\n\nfunc (pt *Fp12) asPtr() *C.blst_fp12 {\n\tif pt != nil {\n\t\treturn &pt.cgo\n\t}\n\n\treturn nil\n}\n\nfunc ptrOrNil(bytes []byte) *C.byte {\n\tvar ptr *C.byte\n\tif len(bytes) > 0 {\n\t\tptr = (*C.byte)(&bytes[0])\n\t}\n\treturn ptr\n}\n\n//\n// MIN-PK\n//\n\n//\n// PublicKey\n//\n\nfunc (pk *P1Affine) From(s *Scalar) *P1Affine {\n\tC.blst_sk_to_pk2_in_g1(nil, &pk.cgo, &s.cgo)\n\treturn pk\n}\n\nfunc (pk *P1Affine) KeyValidate() bool {\n\treturn bool(C.go_p1_affine_validate(&pk.cgo, true))\n}\n\n// sigInfcheck, check for infinity, is a way to avoid going\n// into resource-consuming verification. Passing 'false' is\n// always cryptographically safe, but application might want\n// to guard against obviously bogus individual[!] signatures.\nfunc (sig *P2Affine) SigValidate(sigInfcheck bool) bool {\n\treturn bool(C.go_p2_affine_validate(&sig.cgo, C.bool(sigInfcheck)))\n}\n\n//\n// Sign\n//\n\nfunc (sig *P2Affine) Sign(sk *SecretKey, msg []byte, dst []byte,\n\toptional ...interface{}) *P2Affine {\n\taugSingle, aug, useHash, ok := parseOpts(optional...)\n\tif !ok || len(aug) != 0 {\n\t\treturn nil\n\t}\n\n\tvar q *P2\n\tif useHash {\n\t\tq = HashToG2(msg, dst, augSingle)\n\t} else {\n\t\tq = EncodeToG2(msg, dst, augSingle)\n\t}\n\tC.blst_sign_pk2_in_g1(nil, &sig.cgo, &q.cgo, &sk.cgo)\n\treturn sig\n}\n\n//\n// Signature\n//\n\n// Functions to return a signature and public key+augmentation tuple.\n// This enables point decompression (if needed) to happen in parallel.\ntype sigGetterP2 func() *P2Affine\ntype pkGetterP1 func(i uint32, temp *P1Affine) (*P1Affine, []byte)\n\n// Single verify with decompressed pk\nfunc (sig *P2Affine) Verify(sigGroupcheck bool, pk *P1Affine, pkValidate bool,\n\tmsg Message, dst []byte,\n\toptional ...interface{}) bool { // useHash bool, aug []byte\n\n\taug, _, useHash, ok := parseOpts(optional...)\n\tif !ok {\n\t\treturn false\n\t}\n\treturn sig.AggregateVerify(sigGroupcheck, []*P1Affine{pk}, pkValidate,\n\t\t[]Message{msg}, dst, useHash, [][]byte{aug})\n}\n\n// Single verify with compressed pk\n// Uses a dummy signature to get the correct type\nfunc (dummy *P2Affine) VerifyCompressed(sig []byte, sigGroupcheck bool,\n\tpk []byte, pkValidate bool, msg Message, dst []byte,\n\toptional ...bool) bool { // useHash bool, usePksAsAugs bool\n\n\treturn dummy.AggregateVerifyCompressed(sig, sigGroupcheck,\n\t\t[][]byte{pk}, pkValidate,\n\t\t[]Message{msg}, dst, optional...)\n}\n\n// Aggregate verify with uncompressed signature and public keys\n// Note that checking message uniqueness, if required, is left to the user.\n// Not all signature schemes require it and this keeps the binding minimal\n// and fast. Refer to the Uniq function for one method method of performing\n// this check.\nfunc (sig *P2Affine) AggregateVerify(sigGroupcheck bool,\n\tpks []*P1Affine, pksVerify bool, msgs []Message, dst []byte,\n\toptional ...interface{}) bool { // useHash bool, augs [][]byte\n\n\t// sanity checks and argument parsing\n\tn := len(pks)\n\tif n == 0 || len(msgs) != n {\n\t\treturn false\n\t}\n\t_, augs, useHash, ok := parseOpts(optional...)\n\tuseAugs := len(augs) != 0\n\tif !ok || (useAugs && len(augs) != n) {\n\t\treturn false\n\t}\n\n\tsigFn := func() *P2Affine {\n\t\treturn sig\n\t}\n\n\tpkFn := func(i uint32, _ *P1Affine) (*P1Affine, []byte) {\n\t\tif useAugs {\n\t\t\treturn pks[i], augs[i]\n\t\t}\n\t\treturn pks[i], nil\n\t}\n\n\treturn coreAggregateVerifyPkInG1(sigFn, sigGroupcheck, pkFn, pksVerify,\n\t\tmsgs, dst, useHash)\n}\n\n// Aggregate verify with compressed signature and public keys\n// Uses a dummy signature to get the correct type\nfunc (*P2Affine) AggregateVerifyCompressed(sig []byte, sigGroupcheck bool,\n\tpks [][]byte, pksVerify bool, msgs []Message, dst []byte,\n\toptional ...bool) bool { // useHash bool, usePksAsAugs bool\n\n\t// sanity checks and argument parsing\n\tif len(pks) != len(msgs) {\n\t\treturn false\n\t}\n\tuseHash := true\n\tif len(optional) > 0 {\n\t\tuseHash = optional[0]\n\t}\n\tusePksAsAugs := false\n\tif len(optional) > 1 {\n\t\tusePksAsAugs = optional[1]\n\t}\n\n\tsigFn := func() *P2Affine {\n\t\tsigP := new(P2Affine)\n\t\tif sigP.Uncompress(sig) == nil {\n\t\t\treturn nil\n\t\t}\n\t\treturn sigP\n\t}\n\tpkFn := func(i uint32, pk *P1Affine) (*P1Affine, []byte) {\n\t\tbytes := pks[i]\n\t\tif len(bytes) == BLST_P1_SERIALIZE_BYTES && (bytes[0]&0x80) == 0 {\n\t\t\t// Not compressed\n\t\t\tif pk.Deserialize(bytes) == nil {\n\t\t\t\treturn nil, nil\n\t\t\t}\n\t\t} else if len(bytes) == BLST_P1_COMPRESS_BYTES && (bytes[0]&0x80) != 0 {\n\t\t\tif pk.Uncompress(bytes) == nil {\n\t\t\t\treturn nil, nil\n\t\t\t}\n\t\t} else {\n\t\t\treturn nil, nil\n\t\t}\n\t\tif usePksAsAugs {\n\t\t\treturn pk, bytes\n\t\t}\n\t\treturn pk, nil\n\t}\n\treturn coreAggregateVerifyPkInG1(sigFn, sigGroupcheck, pkFn, pksVerify,\n\t\tmsgs, dst, useHash)\n}\n\nfunc coreAggregateVerifyPkInG1(sigFn sigGetterP2, sigGroupcheck bool,\n\tpkFn pkGetterP1, pkValidate bool, msgs []Message, dst []byte,\n\toptional ...bool) bool { // useHash\n\n\tn := len(msgs)\n\tif n == 0 {\n\t\treturn false\n\t}\n\n\tuseHash := true\n\tif len(optional) > 0 {\n\t\tuseHash = optional[0]\n\t}\n\n\tnumCores := runtime.GOMAXPROCS(0)\n\tnumThreads := numThreads(n)\n\n\t// Each thread will determine next message to process by atomically\n\t// incrementing curItem, process corresponding pk,msg[,aug] tuple and\n\t// repeat until n is exceeded.  The resulting accumulations will be\n\t// fed into the msgsCh channel.\n\tmsgsCh := make(chan Pairing, numThreads)\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\tmutex := sync.Mutex{}\n\n\tmutex.Lock()\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tpairing := PairingCtx(useHash, dst)\n\t\t\tvar temp P1Affine\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t} else if work == 0 && maxProcs == numCores-1 &&\n\t\t\t\t\tnumThreads == maxProcs {\n\t\t\t\t\t// Avoid consuming all cores by waiting until the\n\t\t\t\t\t// main thread has completed its miller loop before\n\t\t\t\t\t// proceeding.\n\t\t\t\t\tmutex.Lock()\n\t\t\t\t\tmutex.Unlock() //nolint:staticcheck\n\t\t\t\t}\n\n\t\t\t\t// Pull Public Key and augmentation blob\n\t\t\t\tcurPk, aug := pkFn(work, &temp)\n\t\t\t\tif curPk == nil {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// Pairing and accumulate\n\t\t\t\tret := PairingAggregatePkInG1(pairing, curPk, pkValidate,\n\t\t\t\t\tnil, false, msgs[work], aug)\n\t\t\t\tif ret != C.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// application might have some async work to do\n\t\t\t\truntime.Gosched()\n\t\t\t}\n\t\t\tif atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tPairingCommit(pairing)\n\t\t\t\tmsgsCh <- pairing\n\t\t\t} else {\n\t\t\t\tmsgsCh <- nil\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Uncompress and check signature\n\tvar gtsig Fp12\n\tsig := sigFn()\n\tif sig == nil {\n\t\tatomic.StoreInt32(&valid, 0)\n\t}\n\tif atomic.LoadInt32(&valid) > 0 && sigGroupcheck &&\n\t\t!sig.SigValidate(false) {\n\t\tatomic.StoreInt32(&valid, 0)\n\t}\n\tif atomic.LoadInt32(&valid) > 0 {\n\t\tC.blst_aggregated_in_g2(&gtsig.cgo, &sig.cgo)\n\t}\n\tmutex.Unlock()\n\n\t// Accumulate the thread results\n\tvar pairings Pairing\n\tfor i := 0; i < numThreads; i++ {\n\t\tmsg := <-msgsCh\n\t\tif msg != nil {\n\t\t\tif pairings == nil {\n\t\t\t\tpairings = msg\n\t\t\t} else {\n\t\t\t\tret := PairingMerge(pairings, msg)\n\t\t\t\tif ret != C.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 || pairings == nil {\n\t\treturn false\n\t}\n\n\treturn PairingFinalVerify(pairings, &gtsig)\n}\n\nfunc CoreVerifyPkInG1(pk *P1Affine, sig *P2Affine, hash_or_encode bool,\n\tmsg Message, dst []byte, optional ...[]byte) int {\n\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tif runtime.NumGoroutine() < maxProcs {\n\t\tsigFn := func() *P2Affine {\n\t\t\treturn sig\n\t\t}\n\t\tpkFn := func(_ uint32, _ *P1Affine) (*P1Affine, []byte) {\n\t\t\treturn pk, aug\n\t\t}\n\t\tif !coreAggregateVerifyPkInG1(sigFn, true, pkFn, true, []Message{msg},\n\t\t\tdst, hash_or_encode) {\n\t\t\treturn C.BLST_VERIFY_FAIL\n\t\t}\n\t\treturn C.BLST_SUCCESS\n\t}\n\n\treturn int(C.blst_core_verify_pk_in_g1(&pk.cgo, &sig.cgo, C.bool(hash_or_encode),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)),\n\t\tptrOrNil(aug), C.size_t(len(aug))))\n}\n\n// pks are assumed to be verified for proof of possession,\n// which implies that they are already group-checked\nfunc (sig *P2Affine) FastAggregateVerify(sigGroupcheck bool,\n\tpks []*P1Affine, msg Message, dst []byte,\n\toptional ...interface{}) bool { // pass-through to Verify\n\tn := len(pks)\n\n\t// TODO: return value for length zero?\n\tif n == 0 {\n\t\treturn false\n\t}\n\n\taggregator := new(P1Aggregate)\n\tif !aggregator.Aggregate(pks, false) {\n\t\treturn false\n\t}\n\tpkAff := aggregator.ToAffine()\n\n\t// Verify\n\treturn sig.Verify(sigGroupcheck, pkAff, false, msg, dst, optional...)\n}\n\nfunc (*P2Affine) MultipleAggregateVerify(sigs []*P2Affine,\n\tsigsGroupcheck bool, pks []*P1Affine, pksVerify bool,\n\tmsgs []Message, dst []byte, randFn func(*Scalar), randBits int,\n\toptional ...interface{}) bool { // useHash\n\n\t// Sanity checks and argument parsing\n\tn := len(pks)\n\tif n == 0 || len(msgs) != n || len(sigs) != n {\n\t\treturn false\n\t}\n\t_, augs, useHash, ok := parseOpts(optional...)\n\tuseAugs := len(augs) != 0\n\tif !ok || (useAugs && len(augs) != n) {\n\t\treturn false\n\t}\n\n\tparamsFn :=\n\t\tfunc(work uint32, _ *P2Affine, _ *P1Affine, rand *Scalar) (\n\t\t\t*P2Affine, *P1Affine, *Scalar, []byte) {\n\t\t\trandFn(rand)\n\t\t\tvar aug []byte\n\t\t\tif useAugs {\n\t\t\t\taug = augs[work]\n\t\t\t}\n\t\t\treturn sigs[work], pks[work], rand, aug\n\t\t}\n\n\treturn multipleAggregateVerifyPkInG1(paramsFn, sigsGroupcheck, pksVerify,\n\t\tmsgs, dst, randBits, useHash)\n}\n\ntype mulAggGetterPkInG1 func(work uint32, sig *P2Affine, pk *P1Affine,\n\trand *Scalar) (*P2Affine, *P1Affine, *Scalar, []byte)\n\nfunc multipleAggregateVerifyPkInG1(paramsFn mulAggGetterPkInG1,\n\tsigsGroupcheck bool, pksVerify bool, msgs []Message,\n\tdst []byte, randBits int,\n\toptional ...bool) bool { // useHash\n\tn := len(msgs)\n\tif n == 0 {\n\t\treturn false\n\t}\n\n\tuseHash := true\n\tif len(optional) > 0 {\n\t\tuseHash = optional[0]\n\t}\n\n\tnumThreads := numThreads(n)\n\n\t// Each thread will determine next message to process by atomically\n\t// incrementing curItem, process corresponding pk,msg[,aug] tuple and\n\t// repeat until n is exceeded.  The resulting accumulations will be\n\t// fed into the msgsCh channel.\n\tmsgsCh := make(chan Pairing, numThreads)\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tpairing := PairingCtx(useHash, dst)\n\t\t\tvar tempRand Scalar\n\t\t\tvar tempPk P1Affine\n\t\t\tvar tempSig P2Affine\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\tcurSig, curPk, curRand, aug := paramsFn(work, &tempSig,\n\t\t\t\t\t&tempPk, &tempRand)\n\n\t\t\t\tif PairingMulNAggregatePkInG1(pairing, curPk, pksVerify,\n\t\t\t\t\tcurSig, sigsGroupcheck, curRand,\n\t\t\t\t\trandBits, msgs[work], aug) !=\n\t\t\t\t\tC.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// application might have some async work to do\n\t\t\t\truntime.Gosched()\n\t\t\t}\n\t\t\tif atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tPairingCommit(pairing)\n\t\t\t\tmsgsCh <- pairing\n\t\t\t} else {\n\t\t\t\tmsgsCh <- nil\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Accumulate the thread results\n\tvar pairings Pairing\n\tfor i := 0; i < numThreads; i++ {\n\t\tmsg := <-msgsCh\n\t\tif msg != nil {\n\t\t\tif pairings == nil {\n\t\t\t\tpairings = msg\n\t\t\t} else {\n\t\t\t\tret := PairingMerge(pairings, msg)\n\t\t\t\tif ret != C.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 || pairings == nil {\n\t\treturn false\n\t}\n\n\treturn PairingFinalVerify(pairings, nil)\n}\n\n//\n// Aggregate P2\n//\n\ntype aggGetterP2 func(i uint32, temp *P2Affine) *P2Affine\ntype P2Aggregate struct {\n\tv *P2\n}\n\n// Aggregate uncompressed elements\nfunc (agg *P2Aggregate) Aggregate(elmts []*P2Affine,\n\tgroupcheck bool) bool {\n\tif len(elmts) == 0 {\n\t\treturn true\n\t}\n\tgetter := func(i uint32, _ *P2Affine) *P2Affine { return elmts[i] }\n\treturn agg.coreAggregate(getter, groupcheck, len(elmts))\n}\n\nfunc (agg *P2Aggregate) AggregateWithRandomness(pointsIf interface{},\n\tscalarsIf interface{}, nbits int, groupcheck bool) bool {\n\tif groupcheck && !P2AffinesValidate(pointsIf) {\n\t\treturn false\n\t}\n\tagg.v = P2AffinesMult(pointsIf, scalarsIf, nbits)\n\treturn true\n}\n\n// Aggregate compressed elements\nfunc (agg *P2Aggregate) AggregateCompressed(elmts [][]byte,\n\tgroupcheck bool) bool {\n\tif len(elmts) == 0 {\n\t\treturn true\n\t}\n\tgetter := func(i uint32, p *P2Affine) *P2Affine {\n\t\tbytes := elmts[i]\n\t\tif p.Uncompress(bytes) == nil {\n\t\t\treturn nil\n\t\t}\n\t\treturn p\n\t}\n\treturn agg.coreAggregate(getter, groupcheck, len(elmts))\n}\n\nfunc (agg *P2Aggregate) AddAggregate(other *P2Aggregate) {\n\tif other.v == nil {\n\t\t// do nothing\n\t} else if agg.v == nil {\n\t\tagg.v = other.v\n\t} else {\n\t\tC.blst_p2_add_or_double(&agg.v.cgo, &agg.v.cgo, &other.v.cgo)\n\t}\n}\n\nfunc (agg *P2Aggregate) Add(elmt *P2Affine, groupcheck bool) bool {\n\tif groupcheck && !bool(C.blst_p2_affine_in_g2(&elmt.cgo)) {\n\t\treturn false\n\t}\n\tif agg.v == nil {\n\t\tagg.v = new(P2)\n\t\tC.blst_p2_from_affine(&agg.v.cgo, &elmt.cgo)\n\t} else {\n\t\tC.blst_p2_add_or_double_affine(&agg.v.cgo, &agg.v.cgo, &elmt.cgo)\n\t}\n\treturn true\n}\n\nfunc (agg *P2Aggregate) ToAffine() *P2Affine {\n\tif agg.v == nil {\n\t\treturn new(P2Affine)\n\t}\n\treturn agg.v.ToAffine()\n}\n\nfunc (agg *P2Aggregate) coreAggregate(getter aggGetterP2, groupcheck bool,\n\tn int) bool {\n\n\tif n == 0 {\n\t\treturn true\n\t}\n\t// operations are considered short enough for not to care about\n\t// keeping one core free...\n\tnumThreads := runtime.GOMAXPROCS(0)\n\tif numThreads > n {\n\t\tnumThreads = n\n\t}\n\n\tvalid := int32(1)\n\ttype result struct {\n\t\tagg   *P2\n\t\tempty bool\n\t}\n\tmsgs := make(chan result, numThreads)\n\tcurItem := uint32(0)\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tfirst := true\n\t\t\tvar agg P2\n\t\t\tvar temp P2Affine\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// Signature validate\n\t\t\t\tcurElmt := getter(work, &temp)\n\t\t\t\tif curElmt == nil {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif groupcheck && !bool(C.blst_p2_affine_in_g2(&curElmt.cgo)) {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif first {\n\t\t\t\t\tC.blst_p2_from_affine(&agg.cgo, &curElmt.cgo)\n\t\t\t\t\tfirst = false\n\t\t\t\t} else {\n\t\t\t\t\tC.blst_p2_add_or_double_affine(&agg.cgo, &agg.cgo, &curElmt.cgo)\n\t\t\t\t}\n\t\t\t\t// application might have some async work to do\n\t\t\t\truntime.Gosched()\n\t\t\t}\n\t\t\tif first {\n\t\t\t\tmsgs <- result{nil, true}\n\t\t\t} else if atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tmsgs <- result{&agg, false}\n\t\t\t} else {\n\t\t\t\tmsgs <- result{nil, false}\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Accumulate the thread results\n\tfirst := agg.v == nil\n\tvalidLocal := true\n\tfor i := 0; i < numThreads; i++ {\n\t\tmsg := <-msgs\n\t\tif !validLocal || msg.empty {\n\t\t\t// do nothing\n\t\t} else if msg.agg == nil {\n\t\t\tvalidLocal = false\n\t\t\t// This should be unnecessary but seems safer\n\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t} else {\n\t\t\tif first {\n\t\t\t\tagg.v = msg.agg\n\t\t\t\tfirst = false\n\t\t\t} else {\n\t\t\t\tC.blst_p2_add_or_double(&agg.v.cgo, &agg.v.cgo, &msg.agg.cgo)\n\t\t\t}\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 {\n\t\tagg.v = nil\n\t\treturn false\n\t}\n\treturn true\n}\n\n//\n// MIN-SIG\n//\n\n//\n// PublicKey\n//\n\nfunc (pk *P2Affine) From(s *Scalar) *P2Affine {\n\tC.blst_sk_to_pk2_in_g2(nil, &pk.cgo, &s.cgo)\n\treturn pk\n}\n\nfunc (pk *P2Affine) KeyValidate() bool {\n\treturn bool(C.go_p2_affine_validate(&pk.cgo, true))\n}\n\n// sigInfcheck, check for infinity, is a way to avoid going\n// into resource-consuming verification. Passing 'false' is\n// always cryptographically safe, but application might want\n// to guard against obviously bogus individual[!] signatures.\nfunc (sig *P1Affine) SigValidate(sigInfcheck bool) bool {\n\treturn bool(C.go_p1_affine_validate(&sig.cgo, C.bool(sigInfcheck)))\n}\n\n//\n// Sign\n//\n\nfunc (sig *P1Affine) Sign(sk *SecretKey, msg []byte, dst []byte,\n\toptional ...interface{}) *P1Affine {\n\taugSingle, aug, useHash, ok := parseOpts(optional...)\n\tif !ok || len(aug) != 0 {\n\t\treturn nil\n\t}\n\n\tvar q *P1\n\tif useHash {\n\t\tq = HashToG1(msg, dst, augSingle)\n\t} else {\n\t\tq = EncodeToG1(msg, dst, augSingle)\n\t}\n\tC.blst_sign_pk2_in_g2(nil, &sig.cgo, &q.cgo, &sk.cgo)\n\treturn sig\n}\n\n//\n// Signature\n//\n\n// Functions to return a signature and public key+augmentation tuple.\n// This enables point decompression (if needed) to happen in parallel.\ntype sigGetterP1 func() *P1Affine\ntype pkGetterP2 func(i uint32, temp *P2Affine) (*P2Affine, []byte)\n\n// Single verify with decompressed pk\nfunc (sig *P1Affine) Verify(sigGroupcheck bool, pk *P2Affine, pkValidate bool,\n\tmsg Message, dst []byte,\n\toptional ...interface{}) bool { // useHash bool, aug []byte\n\n\taug, _, useHash, ok := parseOpts(optional...)\n\tif !ok {\n\t\treturn false\n\t}\n\treturn sig.AggregateVerify(sigGroupcheck, []*P2Affine{pk}, pkValidate,\n\t\t[]Message{msg}, dst, useHash, [][]byte{aug})\n}\n\n// Single verify with compressed pk\n// Uses a dummy signature to get the correct type\nfunc (dummy *P1Affine) VerifyCompressed(sig []byte, sigGroupcheck bool,\n\tpk []byte, pkValidate bool, msg Message, dst []byte,\n\toptional ...bool) bool { // useHash bool, usePksAsAugs bool\n\n\treturn dummy.AggregateVerifyCompressed(sig, sigGroupcheck,\n\t\t[][]byte{pk}, pkValidate,\n\t\t[]Message{msg}, dst, optional...)\n}\n\n// Aggregate verify with uncompressed signature and public keys\n// Note that checking message uniqueness, if required, is left to the user.\n// Not all signature schemes require it and this keeps the binding minimal\n// and fast. Refer to the Uniq function for one method method of performing\n// this check.\nfunc (sig *P1Affine) AggregateVerify(sigGroupcheck bool,\n\tpks []*P2Affine, pksVerify bool, msgs []Message, dst []byte,\n\toptional ...interface{}) bool { // useHash bool, augs [][]byte\n\n\t// sanity checks and argument parsing\n\tn := len(pks)\n\tif n == 0 || len(msgs) != n {\n\t\treturn false\n\t}\n\t_, augs, useHash, ok := parseOpts(optional...)\n\tuseAugs := len(augs) != 0\n\tif !ok || (useAugs && len(augs) != n) {\n\t\treturn false\n\t}\n\n\tsigFn := func() *P1Affine {\n\t\treturn sig\n\t}\n\n\tpkFn := func(i uint32, _ *P2Affine) (*P2Affine, []byte) {\n\t\tif useAugs {\n\t\t\treturn pks[i], augs[i]\n\t\t}\n\t\treturn pks[i], nil\n\t}\n\n\treturn coreAggregateVerifyPkInG2(sigFn, sigGroupcheck, pkFn, pksVerify,\n\t\tmsgs, dst, useHash)\n}\n\n// Aggregate verify with compressed signature and public keys\n// Uses a dummy signature to get the correct type\nfunc (*P1Affine) AggregateVerifyCompressed(sig []byte, sigGroupcheck bool,\n\tpks [][]byte, pksVerify bool, msgs []Message, dst []byte,\n\toptional ...bool) bool { // useHash bool, usePksAsAugs bool\n\n\t// sanity checks and argument parsing\n\tif len(pks) != len(msgs) {\n\t\treturn false\n\t}\n\tuseHash := true\n\tif len(optional) > 0 {\n\t\tuseHash = optional[0]\n\t}\n\tusePksAsAugs := false\n\tif len(optional) > 1 {\n\t\tusePksAsAugs = optional[1]\n\t}\n\n\tsigFn := func() *P1Affine {\n\t\tsigP := new(P1Affine)\n\t\tif sigP.Uncompress(sig) == nil {\n\t\t\treturn nil\n\t\t}\n\t\treturn sigP\n\t}\n\tpkFn := func(i uint32, pk *P2Affine) (*P2Affine, []byte) {\n\t\tbytes := pks[i]\n\t\tif len(bytes) == BLST_P2_SERIALIZE_BYTES && (bytes[0]&0x80) == 0 {\n\t\t\t// Not compressed\n\t\t\tif pk.Deserialize(bytes) == nil {\n\t\t\t\treturn nil, nil\n\t\t\t}\n\t\t} else if len(bytes) == BLST_P2_COMPRESS_BYTES && (bytes[0]&0x80) != 0 {\n\t\t\tif pk.Uncompress(bytes) == nil {\n\t\t\t\treturn nil, nil\n\t\t\t}\n\t\t} else {\n\t\t\treturn nil, nil\n\t\t}\n\t\tif usePksAsAugs {\n\t\t\treturn pk, bytes\n\t\t}\n\t\treturn pk, nil\n\t}\n\treturn coreAggregateVerifyPkInG2(sigFn, sigGroupcheck, pkFn, pksVerify,\n\t\tmsgs, dst, useHash)\n}\n\nfunc coreAggregateVerifyPkInG2(sigFn sigGetterP1, sigGroupcheck bool,\n\tpkFn pkGetterP2, pkValidate bool, msgs []Message, dst []byte,\n\toptional ...bool) bool { // useHash\n\n\tn := len(msgs)\n\tif n == 0 {\n\t\treturn false\n\t}\n\n\tuseHash := true\n\tif len(optional) > 0 {\n\t\tuseHash = optional[0]\n\t}\n\n\tnumCores := runtime.GOMAXPROCS(0)\n\tnumThreads := numThreads(n)\n\n\t// Each thread will determine next message to process by atomically\n\t// incrementing curItem, process corresponding pk,msg[,aug] tuple and\n\t// repeat until n is exceeded.  The resulting accumulations will be\n\t// fed into the msgsCh channel.\n\tmsgsCh := make(chan Pairing, numThreads)\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\tmutex := sync.Mutex{}\n\n\tmutex.Lock()\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tpairing := PairingCtx(useHash, dst)\n\t\t\tvar temp P2Affine\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t} else if work == 0 && maxProcs == numCores-1 &&\n\t\t\t\t\tnumThreads == maxProcs {\n\t\t\t\t\t// Avoid consuming all cores by waiting until the\n\t\t\t\t\t// main thread has completed its miller loop before\n\t\t\t\t\t// proceeding.\n\t\t\t\t\tmutex.Lock()\n\t\t\t\t\tmutex.Unlock() //nolint:staticcheck\n\t\t\t\t}\n\n\t\t\t\t// Pull Public Key and augmentation blob\n\t\t\t\tcurPk, aug := pkFn(work, &temp)\n\t\t\t\tif curPk == nil {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// Pairing and accumulate\n\t\t\t\tret := PairingAggregatePkInG2(pairing, curPk, pkValidate,\n\t\t\t\t\tnil, false, msgs[work], aug)\n\t\t\t\tif ret != C.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// application might have some async work to do\n\t\t\t\truntime.Gosched()\n\t\t\t}\n\t\t\tif atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tPairingCommit(pairing)\n\t\t\t\tmsgsCh <- pairing\n\t\t\t} else {\n\t\t\t\tmsgsCh <- nil\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Uncompress and check signature\n\tvar gtsig Fp12\n\tsig := sigFn()\n\tif sig == nil {\n\t\tatomic.StoreInt32(&valid, 0)\n\t}\n\tif atomic.LoadInt32(&valid) > 0 && sigGroupcheck &&\n\t\t!sig.SigValidate(false) {\n\t\tatomic.StoreInt32(&valid, 0)\n\t}\n\tif atomic.LoadInt32(&valid) > 0 {\n\t\tC.blst_aggregated_in_g1(&gtsig.cgo, &sig.cgo)\n\t}\n\tmutex.Unlock()\n\n\t// Accumulate the thread results\n\tvar pairings Pairing\n\tfor i := 0; i < numThreads; i++ {\n\t\tmsg := <-msgsCh\n\t\tif msg != nil {\n\t\t\tif pairings == nil {\n\t\t\t\tpairings = msg\n\t\t\t} else {\n\t\t\t\tret := PairingMerge(pairings, msg)\n\t\t\t\tif ret != C.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 || pairings == nil {\n\t\treturn false\n\t}\n\n\treturn PairingFinalVerify(pairings, &gtsig)\n}\n\nfunc CoreVerifyPkInG2(pk *P2Affine, sig *P1Affine, hash_or_encode bool,\n\tmsg Message, dst []byte, optional ...[]byte) int {\n\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tif runtime.NumGoroutine() < maxProcs {\n\t\tsigFn := func() *P1Affine {\n\t\t\treturn sig\n\t\t}\n\t\tpkFn := func(_ uint32, _ *P2Affine) (*P2Affine, []byte) {\n\t\t\treturn pk, aug\n\t\t}\n\t\tif !coreAggregateVerifyPkInG2(sigFn, true, pkFn, true, []Message{msg},\n\t\t\tdst, hash_or_encode) {\n\t\t\treturn C.BLST_VERIFY_FAIL\n\t\t}\n\t\treturn C.BLST_SUCCESS\n\t}\n\n\treturn int(C.blst_core_verify_pk_in_g2(&pk.cgo, &sig.cgo, C.bool(hash_or_encode),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)),\n\t\tptrOrNil(aug), C.size_t(len(aug))))\n}\n\n// pks are assumed to be verified for proof of possession,\n// which implies that they are already group-checked\nfunc (sig *P1Affine) FastAggregateVerify(sigGroupcheck bool,\n\tpks []*P2Affine, msg Message, dst []byte,\n\toptional ...interface{}) bool { // pass-through to Verify\n\tn := len(pks)\n\n\t// TODO: return value for length zero?\n\tif n == 0 {\n\t\treturn false\n\t}\n\n\taggregator := new(P2Aggregate)\n\tif !aggregator.Aggregate(pks, false) {\n\t\treturn false\n\t}\n\tpkAff := aggregator.ToAffine()\n\n\t// Verify\n\treturn sig.Verify(sigGroupcheck, pkAff, false, msg, dst, optional...)\n}\n\nfunc (*P1Affine) MultipleAggregateVerify(sigs []*P1Affine,\n\tsigsGroupcheck bool, pks []*P2Affine, pksVerify bool,\n\tmsgs []Message, dst []byte, randFn func(*Scalar), randBits int,\n\toptional ...interface{}) bool { // useHash\n\n\t// Sanity checks and argument parsing\n\tn := len(pks)\n\tif n == 0 || len(msgs) != n || len(sigs) != n {\n\t\treturn false\n\t}\n\t_, augs, useHash, ok := parseOpts(optional...)\n\tuseAugs := len(augs) != 0\n\tif !ok || (useAugs && len(augs) != n) {\n\t\treturn false\n\t}\n\n\tparamsFn :=\n\t\tfunc(work uint32, _ *P1Affine, _ *P2Affine, rand *Scalar) (\n\t\t\t*P1Affine, *P2Affine, *Scalar, []byte) {\n\t\t\trandFn(rand)\n\t\t\tvar aug []byte\n\t\t\tif useAugs {\n\t\t\t\taug = augs[work]\n\t\t\t}\n\t\t\treturn sigs[work], pks[work], rand, aug\n\t\t}\n\n\treturn multipleAggregateVerifyPkInG2(paramsFn, sigsGroupcheck, pksVerify,\n\t\tmsgs, dst, randBits, useHash)\n}\n\ntype mulAggGetterPkInG2 func(work uint32, sig *P1Affine, pk *P2Affine,\n\trand *Scalar) (*P1Affine, *P2Affine, *Scalar, []byte)\n\nfunc multipleAggregateVerifyPkInG2(paramsFn mulAggGetterPkInG2,\n\tsigsGroupcheck bool, pksVerify bool, msgs []Message,\n\tdst []byte, randBits int,\n\toptional ...bool) bool { // useHash\n\tn := len(msgs)\n\tif n == 0 {\n\t\treturn false\n\t}\n\n\tuseHash := true\n\tif len(optional) > 0 {\n\t\tuseHash = optional[0]\n\t}\n\n\tnumThreads := numThreads(n)\n\n\t// Each thread will determine next message to process by atomically\n\t// incrementing curItem, process corresponding pk,msg[,aug] tuple and\n\t// repeat until n is exceeded.  The resulting accumulations will be\n\t// fed into the msgsCh channel.\n\tmsgsCh := make(chan Pairing, numThreads)\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tpairing := PairingCtx(useHash, dst)\n\t\t\tvar tempRand Scalar\n\t\t\tvar tempPk P2Affine\n\t\t\tvar tempSig P1Affine\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\tcurSig, curPk, curRand, aug := paramsFn(work, &tempSig,\n\t\t\t\t\t&tempPk, &tempRand)\n\n\t\t\t\tif PairingMulNAggregatePkInG2(pairing, curPk, pksVerify,\n\t\t\t\t\tcurSig, sigsGroupcheck, curRand,\n\t\t\t\t\trandBits, msgs[work], aug) !=\n\t\t\t\t\tC.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// application might have some async work to do\n\t\t\t\truntime.Gosched()\n\t\t\t}\n\t\t\tif atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tPairingCommit(pairing)\n\t\t\t\tmsgsCh <- pairing\n\t\t\t} else {\n\t\t\t\tmsgsCh <- nil\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Accumulate the thread results\n\tvar pairings Pairing\n\tfor i := 0; i < numThreads; i++ {\n\t\tmsg := <-msgsCh\n\t\tif msg != nil {\n\t\t\tif pairings == nil {\n\t\t\t\tpairings = msg\n\t\t\t} else {\n\t\t\t\tret := PairingMerge(pairings, msg)\n\t\t\t\tif ret != C.BLST_SUCCESS {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 || pairings == nil {\n\t\treturn false\n\t}\n\n\treturn PairingFinalVerify(pairings, nil)\n}\n\n//\n// Aggregate P1\n//\n\ntype aggGetterP1 func(i uint32, temp *P1Affine) *P1Affine\ntype P1Aggregate struct {\n\tv *P1\n}\n\n// Aggregate uncompressed elements\nfunc (agg *P1Aggregate) Aggregate(elmts []*P1Affine,\n\tgroupcheck bool) bool {\n\tif len(elmts) == 0 {\n\t\treturn true\n\t}\n\tgetter := func(i uint32, _ *P1Affine) *P1Affine { return elmts[i] }\n\treturn agg.coreAggregate(getter, groupcheck, len(elmts))\n}\n\nfunc (agg *P1Aggregate) AggregateWithRandomness(pointsIf interface{},\n\tscalarsIf interface{}, nbits int, groupcheck bool) bool {\n\tif groupcheck && !P1AffinesValidate(pointsIf) {\n\t\treturn false\n\t}\n\tagg.v = P1AffinesMult(pointsIf, scalarsIf, nbits)\n\treturn true\n}\n\n// Aggregate compressed elements\nfunc (agg *P1Aggregate) AggregateCompressed(elmts [][]byte,\n\tgroupcheck bool) bool {\n\tif len(elmts) == 0 {\n\t\treturn true\n\t}\n\tgetter := func(i uint32, p *P1Affine) *P1Affine {\n\t\tbytes := elmts[i]\n\t\tif p.Uncompress(bytes) == nil {\n\t\t\treturn nil\n\t\t}\n\t\treturn p\n\t}\n\treturn agg.coreAggregate(getter, groupcheck, len(elmts))\n}\n\nfunc (agg *P1Aggregate) AddAggregate(other *P1Aggregate) {\n\tif other.v == nil {\n\t\t// do nothing\n\t} else if agg.v == nil {\n\t\tagg.v = other.v\n\t} else {\n\t\tC.blst_p1_add_or_double(&agg.v.cgo, &agg.v.cgo, &other.v.cgo)\n\t}\n}\n\nfunc (agg *P1Aggregate) Add(elmt *P1Affine, groupcheck bool) bool {\n\tif groupcheck && !bool(C.blst_p1_affine_in_g1(&elmt.cgo)) {\n\t\treturn false\n\t}\n\tif agg.v == nil {\n\t\tagg.v = new(P1)\n\t\tC.blst_p1_from_affine(&agg.v.cgo, &elmt.cgo)\n\t} else {\n\t\tC.blst_p1_add_or_double_affine(&agg.v.cgo, &agg.v.cgo, &elmt.cgo)\n\t}\n\treturn true\n}\n\nfunc (agg *P1Aggregate) ToAffine() *P1Affine {\n\tif agg.v == nil {\n\t\treturn new(P1Affine)\n\t}\n\treturn agg.v.ToAffine()\n}\n\nfunc (agg *P1Aggregate) coreAggregate(getter aggGetterP1, groupcheck bool,\n\tn int) bool {\n\n\tif n == 0 {\n\t\treturn true\n\t}\n\t// operations are considered short enough for not to care about\n\t// keeping one core free...\n\tnumThreads := runtime.GOMAXPROCS(0)\n\tif numThreads > n {\n\t\tnumThreads = n\n\t}\n\n\tvalid := int32(1)\n\ttype result struct {\n\t\tagg   *P1\n\t\tempty bool\n\t}\n\tmsgs := make(chan result, numThreads)\n\tcurItem := uint32(0)\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tfirst := true\n\t\t\tvar agg P1\n\t\t\tvar temp P1Affine\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// Signature validate\n\t\t\t\tcurElmt := getter(work, &temp)\n\t\t\t\tif curElmt == nil {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif groupcheck && !bool(C.blst_p1_affine_in_g1(&curElmt.cgo)) {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif first {\n\t\t\t\t\tC.blst_p1_from_affine(&agg.cgo, &curElmt.cgo)\n\t\t\t\t\tfirst = false\n\t\t\t\t} else {\n\t\t\t\t\tC.blst_p1_add_or_double_affine(&agg.cgo, &agg.cgo, &curElmt.cgo)\n\t\t\t\t}\n\t\t\t\t// application might have some async work to do\n\t\t\t\truntime.Gosched()\n\t\t\t}\n\t\t\tif first {\n\t\t\t\tmsgs <- result{nil, true}\n\t\t\t} else if atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tmsgs <- result{&agg, false}\n\t\t\t} else {\n\t\t\t\tmsgs <- result{nil, false}\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Accumulate the thread results\n\tfirst := agg.v == nil\n\tvalidLocal := true\n\tfor i := 0; i < numThreads; i++ {\n\t\tmsg := <-msgs\n\t\tif !validLocal || msg.empty {\n\t\t\t// do nothing\n\t\t} else if msg.agg == nil {\n\t\t\tvalidLocal = false\n\t\t\t// This should be unnecessary but seems safer\n\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t} else {\n\t\t\tif first {\n\t\t\t\tagg.v = msg.agg\n\t\t\t\tfirst = false\n\t\t\t} else {\n\t\t\t\tC.blst_p1_add_or_double(&agg.v.cgo, &agg.v.cgo, &msg.agg.cgo)\n\t\t\t}\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 {\n\t\tagg.v = nil\n\t\treturn false\n\t}\n\treturn true\n}\nfunc PairingAggregatePkInG1(ctx Pairing, PK *P1Affine, pkValidate bool,\n\tsig *P2Affine, sigGroupcheck bool, msg []byte,\n\toptional ...[]byte) int { // aug\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tr := C.blst_pairing_chk_n_aggr_pk_in_g1(&ctx[0],\n\t\tPK.asPtr(), C.bool(pkValidate),\n\t\tsig.asPtr(), C.bool(sigGroupcheck),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\n\treturn int(r)\n}\n\nfunc PairingMulNAggregatePkInG1(ctx Pairing, PK *P1Affine, pkValidate bool,\n\tsig *P2Affine, sigGroupcheck bool,\n\trand *Scalar, randBits int, msg []byte,\n\toptional ...[]byte) int { // aug\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tr := C.blst_pairing_chk_n_mul_n_aggr_pk_in_g1(&ctx[0],\n\t\tPK.asPtr(), C.bool(pkValidate),\n\t\tsig.asPtr(), C.bool(sigGroupcheck),\n\t\t&rand.cgo.b[0], C.size_t(randBits),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\n\treturn int(r)\n}\n\n//\n// Serialization/Deserialization.\n//\n\n// P1 Serdes\nfunc (p1 *P1Affine) Serialize() []byte {\n\tvar out [BLST_P1_SERIALIZE_BYTES]byte\n\tC.blst_p1_affine_serialize((*C.byte)(&out[0]), &p1.cgo)\n\treturn out[:]\n}\n\nfunc (p1 *P1Affine) Deserialize(in []byte) *P1Affine {\n\tif len(in) != BLST_P1_SERIALIZE_BYTES {\n\t\treturn nil\n\t}\n\tif C.blst_p1_deserialize(&p1.cgo, (*C.byte)(&in[0])) != C.BLST_SUCCESS {\n\t\treturn nil\n\t}\n\treturn p1\n}\nfunc (p1 *P1Affine) Compress() []byte {\n\tvar out [BLST_P1_COMPRESS_BYTES]byte\n\tC.blst_p1_affine_compress((*C.byte)(&out[0]), &p1.cgo)\n\treturn out[:]\n}\n\nfunc (p1 *P1Affine) Uncompress(in []byte) *P1Affine {\n\tif len(in) != BLST_P1_COMPRESS_BYTES {\n\t\treturn nil\n\t}\n\tif C.blst_p1_uncompress(&p1.cgo, (*C.byte)(&in[0])) != C.BLST_SUCCESS {\n\t\treturn nil\n\t}\n\treturn p1\n}\n\nfunc (p1 *P1Affine) InG1() bool {\n\treturn bool(C.blst_p1_affine_in_g1(&p1.cgo))\n}\n\nfunc (*P1Affine) BatchUncompress(in [][]byte) []*P1Affine {\n\t// Allocate space for all of the resulting points. Later we'll save pointers\n\t// and return those so that the result could be used in other functions,\n\t// such as MultipleAggregateVerify.\n\tn := len(in)\n\tpoints := make([]P1Affine, n)\n\tpointsPtrs := make([]*P1Affine, n)\n\n\tnumThreads := numThreads(n)\n\n\t// Each thread will determine next message to process by atomically\n\t// incrementing curItem, process corresponding point, and\n\t// repeat until n is exceeded. Each thread will send a result (true for\n\t// success, false for failure) into the channel when complete.\n\tresCh := make(chan bool, numThreads)\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif points[work].Uncompress(in[work]) == nil {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tpointsPtrs[work] = &points[work]\n\t\t\t}\n\t\t\tif atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tresCh <- true\n\t\t\t} else {\n\t\t\t\tresCh <- false\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Collect the threads\n\tresult := true\n\tfor i := 0; i < numThreads; i++ {\n\t\tif !<-resCh {\n\t\t\tresult = false\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 || !result {\n\t\treturn nil\n\t}\n\treturn pointsPtrs\n}\n\nfunc (p1 *P1) Serialize() []byte {\n\tvar out [BLST_P1_SERIALIZE_BYTES]byte\n\tC.blst_p1_serialize((*C.byte)(&out[0]), &p1.cgo)\n\treturn out[:]\n}\nfunc (p1 *P1) Compress() []byte {\n\tvar out [BLST_P1_COMPRESS_BYTES]byte\n\tC.blst_p1_compress((*C.byte)(&out[0]), &p1.cgo)\n\treturn out[:]\n}\n\nfunc (p1 *P1) MultAssign(scalarIf interface{}, optional ...int) *P1 {\n\tvar nbits int\n\tvar scalar *C.byte\n\tswitch val := scalarIf.(type) {\n\tcase []byte:\n\t\tscalar = (*C.byte)(&val[0])\n\t\tnbits = len(val) * 8\n\tcase *Scalar:\n\t\tscalar = &val.cgo.b[0]\n\t\tnbits = 255\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\tif len(optional) > 0 {\n\t\tnbits = optional[0]\n\t}\n\tC.blst_p1_mult(&p1.cgo, &p1.cgo, scalar, C.size_t(nbits))\n\treturn p1\n}\n\nfunc (p1 *P1) Mult(scalarIf interface{}, optional ...int) *P1 {\n\tret := *p1\n\treturn ret.MultAssign(scalarIf, optional...)\n}\n\nfunc (p1 *P1) AddAssign(pointIf interface{}) *P1 {\n\tswitch val := pointIf.(type) {\n\tcase *P1:\n\t\tC.blst_p1_add_or_double(&p1.cgo, &p1.cgo, &val.cgo)\n\tcase *P1Affine:\n\t\tC.blst_p1_add_or_double_affine(&p1.cgo, &p1.cgo, &val.cgo)\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\treturn p1\n}\n\nfunc (p1 *P1) Add(pointIf interface{}) *P1 {\n\tret := *p1\n\treturn ret.AddAssign(pointIf)\n}\n\nfunc (p1 *P1) SubAssign(pointIf interface{}) *P1 {\n\tvar x *C.blst_fp\n\tvar affine C.bool\n\tswitch val := pointIf.(type) {\n\tcase *P1:\n\t\tx = &val.cgo.x\n\t\taffine = false\n\tcase *P1Affine:\n\t\tx = &val.cgo.x\n\t\taffine = true\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\tC.go_p1_sub_assign(&p1.cgo, x, affine)\n\treturn p1\n}\n\nfunc (p1 *P1) Sub(pointIf interface{}) *P1 {\n\tret := *p1\n\treturn ret.SubAssign(pointIf)\n}\n\nfunc P1Generator() *P1 {\n\treturn &cgo_p1Generator\n}\n\n// 'acc += point * scalar', passing 'nil' for 'point' means \"use the\n//\n//\tgroup generator point\"\nfunc (acc *P1) MultNAccumulate(pointIf interface{}, scalarIf interface{},\n\toptional ...int) *P1 {\n\tvar x *C.blst_fp\n\tvar affine C.bool\n\tif pointIf != nil {\n\t\tswitch val := pointIf.(type) {\n\t\tcase *P1:\n\t\t\tx = &val.cgo.x\n\t\t\taffine = false\n\t\tcase *P1Affine:\n\t\t\tx = &val.cgo.x\n\t\t\taffine = true\n\t\tdefault:\n\t\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t\t}\n\t}\n\tvar nbits int\n\tvar scalar *C.byte\n\tswitch val := scalarIf.(type) {\n\tcase []byte:\n\t\tscalar = (*C.byte)(&val[0])\n\t\tnbits = len(val) * 8\n\tcase *Scalar:\n\t\tscalar = &val.cgo.b[0]\n\t\tnbits = 255\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\tif len(optional) > 0 {\n\t\tnbits = optional[0]\n\t}\n\tC.go_p1_mult_n_acc(&acc.cgo, x, affine, scalar, C.size_t(nbits))\n\treturn acc\n}\n\n//\n// Affine\n//\n\nfunc (p *P1) ToAffine() *P1Affine {\n\tvar pa P1Affine\n\tC.blst_p1_to_affine(&pa.cgo, &p.cgo)\n\treturn &pa\n}\n\nfunc (p *P1) FromAffine(pa *P1Affine) {\n\tC.blst_p1_from_affine(&p.cgo, &pa.cgo)\n}\n\n// Hash\nfunc HashToG1(msg []byte, dst []byte,\n\toptional ...[]byte) *P1 { // aug\n\tvar q P1\n\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tC.blst_hash_to_g1(&q.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\treturn &q\n}\n\nfunc EncodeToG1(msg []byte, dst []byte,\n\toptional ...[]byte) *P1 { // aug\n\tvar q P1\n\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tC.blst_encode_to_g1(&q.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\treturn &q\n}\n\n//\n// Multi-point/scalar operations\n//\n\nfunc P1sToAffine(points []*P1, optional ...int) P1Affines {\n\tvar npoints int\n\tif len(optional) > 0 {\n\t\tnpoints = optional[0]\n\t} else {\n\t\tnpoints = len(points)\n\t}\n\tret := make([]P1Affine, npoints)\n\t_cgoCheckPointer := func(...interface{}) {}\n\tC.blst_p1s_to_affine(&ret[0].cgo, (**C.blst_p1)(unsafe.Pointer(&points[0])),\n\t\tC.size_t(npoints))\n\treturn ret\n}\n\nfunc (points P1s) ToAffine(optional ...P1Affines) P1Affines {\n\tnpoints := len(points)\n\tvar ret P1Affines\n\n\tif len(optional) > 0 { // used in benchmark\n\t\tret = optional[0]\n\t\tif len(ret) < npoints {\n\t\t\tpanic(\"npoints mismatch\")\n\t\t}\n\t} else {\n\t\tret = make([]P1Affine, npoints)\n\t}\n\n\tif maxProcs < 2 || npoints < 768 {\n\t\tC.go_p1slice_to_affine(&ret[0].cgo, &points[0].cgo, C.size_t(npoints))\n\t\treturn ret\n\t}\n\n\tnslices := (npoints + 511) / 512\n\tif nslices > maxProcs {\n\t\tnslices = maxProcs\n\t}\n\tdelta, rem := npoints/nslices+1, npoints%nslices\n\n\tvar wg sync.WaitGroup\n\twg.Add(nslices)\n\tfor x := 0; x < npoints; x += delta {\n\t\tif rem == 0 {\n\t\t\tdelta -= 1\n\t\t}\n\t\trem -= 1\n\t\tgo func(out *P1Affine, inp *P1, delta int) {\n\t\t\tC.go_p1slice_to_affine(&out.cgo, &inp.cgo, C.size_t(delta))\n\t\t\twg.Done()\n\t\t}(&ret[x], &points[x], delta)\n\t}\n\twg.Wait()\n\n\treturn ret\n}\n\n//\n// Batch addition\n//\n\nfunc P1AffinesAdd(points []*P1Affine, optional ...int) *P1 {\n\tvar npoints int\n\tif len(optional) > 0 {\n\t\tnpoints = optional[0]\n\t} else {\n\t\tnpoints = len(points)\n\t}\n\tvar ret P1\n\t_cgoCheckPointer := func(...interface{}) {}\n\tC.blst_p1s_add(&ret.cgo, (**C.blst_p1_affine)(unsafe.Pointer(&points[0])),\n\t\tC.size_t(npoints))\n\treturn &ret\n}\n\nfunc (points P1Affines) Add() *P1 {\n\tnpoints := len(points)\n\tif maxProcs < 2 || npoints < 768 {\n\t\tvar ret P1\n\t\tC.go_p1slice_add(&ret.cgo, &points[0].cgo, C.size_t(npoints))\n\t\treturn &ret\n\t}\n\n\tnslices := (npoints + 511) / 512\n\tif nslices > maxProcs {\n\t\tnslices = maxProcs\n\t}\n\tdelta, rem := npoints/nslices+1, npoints%nslices\n\n\tmsgs := make(chan P1, nslices)\n\tfor x := 0; x < npoints; x += delta {\n\t\tif rem == 0 {\n\t\t\tdelta -= 1\n\t\t}\n\t\trem -= 1\n\t\tgo func(points *P1Affine, delta int) {\n\t\t\tvar ret P1\n\t\t\tC.go_p1slice_add(&ret.cgo, &points.cgo, C.size_t(delta))\n\t\t\tmsgs <- ret\n\t\t}(&points[x], delta)\n\t}\n\n\tret := <-msgs\n\tfor i := 1; i < nslices; i++ {\n\t\tmsg := <-msgs\n\t\tC.blst_p1_add_or_double(&ret.cgo, &ret.cgo, &msg.cgo)\n\t}\n\treturn &ret\n}\n\nfunc (points P1s) Add() *P1 {\n\treturn points.ToAffine().Add()\n}\n\n//\n// Multi-scalar multiplication\n//\n\nfunc P1AffinesMult(pointsIf interface{}, scalarsIf interface{}, nbits int) *P1 {\n\tvar npoints int\n\tswitch val := pointsIf.(type) {\n\tcase []*P1Affine:\n\t\tnpoints = len(val)\n\tcase []P1Affine:\n\t\tnpoints = len(val)\n\tcase P1Affines:\n\t\tnpoints = len(val)\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\n\tnbytes := (nbits + 7) / 8\n\tvar scalars []*C.byte\n\tswitch val := scalarsIf.(type) {\n\tcase []byte:\n\t\tif len(val) < npoints*nbytes {\n\t\t\treturn nil\n\t\t}\n\tcase [][]byte:\n\t\tif len(val) < npoints {\n\t\t\treturn nil\n\t\t}\n\t\tscalars = make([]*C.byte, npoints)\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = (*C.byte)(&val[i][0])\n\t\t}\n\tcase []Scalar:\n\t\tif len(val) < npoints {\n\t\t\treturn nil\n\t\t}\n\t\tif nbits <= 248 {\n\t\t\tscalars = make([]*C.byte, npoints)\n\t\t\tfor i := range scalars {\n\t\t\t\tscalars[i] = &val[i].cgo.b[0]\n\t\t\t}\n\t\t}\n\tcase []*Scalar:\n\t\tif len(val) < npoints {\n\t\t\treturn nil\n\t\t}\n\t\tscalars = make([]*C.byte, npoints)\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = &val[i].cgo.b[0]\n\t\t}\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\n\tnumThreads := numThreads(0)\n\n\tif numThreads < 2 {\n\t\tsz := int(C.blst_p1s_mult_pippenger_scratch_sizeof(C.size_t(npoints))) / 8\n\t\tscratch := make([]uint64, sz)\n\n\t\tpointsBySlice := [2]*C.blst_p1_affine{nil, nil}\n\t\tvar p_points **C.blst_p1_affine\n\t\tswitch val := pointsIf.(type) {\n\t\tcase []*P1Affine:\n\t\t\tp_points = (**C.blst_p1_affine)(unsafe.Pointer(&val[0]))\n\t\tcase []P1Affine:\n\t\t\tpointsBySlice[0] = &val[0].cgo\n\t\t\tp_points = &pointsBySlice[0]\n\t\tcase P1Affines:\n\t\t\tpointsBySlice[0] = &val[0].cgo\n\t\t\tp_points = &pointsBySlice[0]\n\t\tdefault: // type is already vetted\n\t\t}\n\n\t\tscalarsBySlice := [2]*C.byte{nil, nil}\n\t\tvar p_scalars **C.byte\n\t\tswitch val := scalarsIf.(type) {\n\t\tcase []byte:\n\t\t\tscalarsBySlice[0] = (*C.byte)(&val[0])\n\t\t\tp_scalars = &scalarsBySlice[0]\n\t\tcase [][]byte:\n\t\t\tp_scalars = &scalars[0]\n\t\tcase []Scalar:\n\t\t\tif nbits > 248 {\n\t\t\t\tscalarsBySlice[0] = &val[0].cgo.b[0]\n\t\t\t\tp_scalars = &scalarsBySlice[0]\n\t\t\t} else {\n\t\t\t\tp_scalars = &scalars[0]\n\t\t\t}\n\t\tcase []*Scalar:\n\t\t\tp_scalars = &scalars[0]\n\t\tdefault: // type is already vetted\n\t\t}\n\n\t\tvar ret P1\n\t\t_cgoCheckPointer := func(...interface{}) {}\n\t\tC.blst_p1s_mult_pippenger(&ret.cgo, p_points, C.size_t(npoints),\n\t\t\tp_scalars, C.size_t(nbits),\n\t\t\t(*C.limb_t)(&scratch[0]))\n\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = nil\n\t\t}\n\n\t\treturn &ret\n\t}\n\n\tif npoints < 32 {\n\t\tif numThreads > npoints {\n\t\t\tnumThreads = npoints\n\t\t}\n\n\t\tcurItem := uint32(0)\n\t\tmsgs := make(chan P1, numThreads)\n\n\t\tfor tid := 0; tid < numThreads; tid++ {\n\t\t\tgo func() {\n\t\t\t\tvar acc P1\n\n\t\t\t\tfor {\n\t\t\t\t\tworkItem := int(atomic.AddUint32(&curItem, 1) - 1)\n\t\t\t\t\tif workItem >= npoints {\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\n\t\t\t\t\tvar point *P1Affine\n\t\t\t\t\tswitch val := pointsIf.(type) {\n\t\t\t\t\tcase []*P1Affine:\n\t\t\t\t\t\tpoint = val[workItem]\n\t\t\t\t\tcase []P1Affine:\n\t\t\t\t\t\tpoint = &val[workItem]\n\t\t\t\t\tcase P1Affines:\n\t\t\t\t\t\tpoint = &val[workItem]\n\t\t\t\t\tdefault: // type is already vetted\n\t\t\t\t\t}\n\n\t\t\t\t\tvar scalar *C.byte\n\t\t\t\t\tswitch val := scalarsIf.(type) {\n\t\t\t\t\tcase []byte:\n\t\t\t\t\t\tscalar = (*C.byte)(&val[workItem*nbytes])\n\t\t\t\t\tcase [][]byte:\n\t\t\t\t\t\tscalar = scalars[workItem]\n\t\t\t\t\tcase []Scalar:\n\t\t\t\t\t\tif nbits > 248 {\n\t\t\t\t\t\t\tscalar = &val[workItem].cgo.b[0]\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\tscalar = scalars[workItem]\n\t\t\t\t\t\t}\n\t\t\t\t\tcase []*Scalar:\n\t\t\t\t\t\tscalar = scalars[workItem]\n\t\t\t\t\tdefault: // type is already vetted\n\t\t\t\t\t}\n\n\t\t\t\t\tC.go_p1_mult_n_acc(&acc.cgo, &point.cgo.x, true,\n\t\t\t\t\t\tscalar, C.size_t(nbits))\n\t\t\t\t}\n\n\t\t\t\tmsgs <- acc\n\t\t\t}()\n\t\t}\n\n\t\tret := <-msgs\n\t\tfor tid := 1; tid < numThreads; tid++ {\n\t\t\tpoint := <-msgs\n\t\t\tC.blst_p1_add_or_double(&ret.cgo, &ret.cgo, &point.cgo)\n\t\t}\n\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = nil\n\t\t}\n\n\t\treturn &ret\n\t}\n\n\t// this is sizeof(scratch[0])\n\tsz := int(C.blst_p1s_mult_pippenger_scratch_sizeof(0)) / 8\n\n\tnx, ny, window := breakdown(nbits, pippenger_window_size(npoints),\n\t\tnumThreads)\n\n\t// |grid[]| holds \"coordinates\" and place for result\n\tgrid := make([]struct {\n\t\tx, dx, y, dy int\n\t\tpoint        P1\n\t}, nx*ny)\n\n\tdx := npoints / nx\n\ty := window * (ny - 1)\n\ttotal := 0\n\tfor ; total < nx; total++ {\n\t\tgrid[total].x = total * dx\n\t\tgrid[total].dx = dx\n\t\tgrid[total].y = y\n\t\tgrid[total].dy = nbits - y\n\t}\n\tgrid[total-1].dx = npoints - grid[total-1].x\n\n\tfor y > 0 {\n\t\ty -= window\n\t\tfor i := 0; i < nx; i++ {\n\t\t\tgrid[total].x = grid[i].x\n\t\t\tgrid[total].dx = grid[i].dx\n\t\t\tgrid[total].y = y\n\t\t\tgrid[total].dy = window\n\t\t\ttotal++\n\t\t}\n\t}\n\n\tif numThreads > total {\n\t\tnumThreads = total\n\t}\n\n\tmsgsCh := make(chan int, ny)\n\trowSync := make([]int32, ny) // count up to |nx|\n\tcurItem := int32(0)\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tscratch := make([]uint64, sz<<uint(window-1))\n\t\t\tpointsBySlice := [2]*C.blst_p1_affine{nil, nil}\n\t\t\tscalarsBySlice := [2]*C.byte{nil, nil}\n\t\t\t_cgoCheckPointer := func(...interface{}) {}\n\n\t\t\tfor {\n\t\t\t\tworkItem := atomic.AddInt32(&curItem, 1) - 1\n\t\t\t\tif int(workItem) >= total {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\tx := grid[workItem].x\n\t\t\t\ty := grid[workItem].y\n\n\t\t\t\tvar p_points **C.blst_p1_affine\n\t\t\t\tswitch val := pointsIf.(type) {\n\t\t\t\tcase []*P1Affine:\n\t\t\t\t\tp_points = (**C.blst_p1_affine)(unsafe.Pointer(&val[x]))\n\t\t\t\tcase []P1Affine:\n\t\t\t\t\tpointsBySlice[0] = &val[x].cgo\n\t\t\t\t\tp_points = &pointsBySlice[0]\n\t\t\t\tcase P1Affines:\n\t\t\t\t\tpointsBySlice[0] = &val[x].cgo\n\t\t\t\t\tp_points = &pointsBySlice[0]\n\t\t\t\tdefault: // type is already vetted\n\t\t\t\t}\n\n\t\t\t\tvar p_scalars **C.byte\n\t\t\t\tswitch val := scalarsIf.(type) {\n\t\t\t\tcase []byte:\n\t\t\t\t\tscalarsBySlice[0] = (*C.byte)(&val[x*nbytes])\n\t\t\t\t\tp_scalars = &scalarsBySlice[0]\n\t\t\t\tcase [][]byte:\n\t\t\t\t\tp_scalars = &scalars[x]\n\t\t\t\tcase []Scalar:\n\t\t\t\t\tif nbits > 248 {\n\t\t\t\t\t\tscalarsBySlice[0] = &val[x].cgo.b[0]\n\t\t\t\t\t\tp_scalars = &scalarsBySlice[0]\n\t\t\t\t\t} else {\n\t\t\t\t\t\tp_scalars = &scalars[x]\n\t\t\t\t\t}\n\t\t\t\tcase []*Scalar:\n\t\t\t\t\tp_scalars = &scalars[x]\n\t\t\t\tdefault: // type is already vetted\n\t\t\t\t}\n\n\t\t\t\tC.blst_p1s_tile_pippenger(&grid[workItem].point.cgo,\n\t\t\t\t\tp_points, C.size_t(grid[workItem].dx),\n\t\t\t\t\tp_scalars, C.size_t(nbits),\n\t\t\t\t\t(*C.limb_t)(&scratch[0]),\n\t\t\t\t\tC.size_t(y), C.size_t(window))\n\n\t\t\t\tif atomic.AddInt32(&rowSync[y/window], 1) == int32(nx) {\n\t\t\t\t\tmsgsCh <- y // \"row\" is done\n\t\t\t\t} else {\n\t\t\t\t\truntime.Gosched() // be nice to the application\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tpointsBySlice[0] = nil\n\t\t\tscalarsBySlice[0] = nil\n\t\t}()\n\t}\n\n\tvar ret P1\n\trows := make([]bool, ny)\n\trow := 0                  // actually index in |grid[]|\n\tfor i := 0; i < ny; i++ { // we expect |ny| messages, one per \"row\"\n\t\ty := <-msgsCh\n\t\trows[y/window] = true  // mark the \"row\"\n\t\tfor grid[row].y == y { // if it's current \"row\", process it\n\t\t\tfor row < total && grid[row].y == y {\n\t\t\t\tC.blst_p1_add_or_double(&ret.cgo, &ret.cgo, &grid[row].point.cgo)\n\t\t\t\trow++\n\t\t\t}\n\t\t\tif y == 0 {\n\t\t\t\tbreak // one can as well 'return &ret' here\n\t\t\t}\n\t\t\tfor j := 0; j < window; j++ {\n\t\t\t\tC.blst_p1_double(&ret.cgo, &ret.cgo)\n\t\t\t}\n\t\t\ty -= window\n\t\t\tif !rows[y/window] { // see if next \"row\" was marked already\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t}\n\n\tfor i := range scalars {\n\t\tscalars[i] = nil\n\t}\n\n\treturn &ret\n}\n\nfunc (points P1Affines) Mult(scalarsIf interface{}, nbits int) *P1 {\n\treturn P1AffinesMult(points, scalarsIf, nbits)\n}\n\nfunc (points P1s) Mult(scalarsIf interface{}, nbits int) *P1 {\n\treturn points.ToAffine().Mult(scalarsIf, nbits)\n}\n\n//\n// Group-check\n//\n\nfunc P1AffinesValidate(pointsIf interface{}) bool {\n\tvar npoints int\n\tswitch val := pointsIf.(type) {\n\tcase []*P1Affine:\n\t\tnpoints = len(val)\n\tcase []P1Affine:\n\t\tnpoints = len(val)\n\tcase P1Affines:\n\t\tnpoints = len(val)\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\n\tnumThreads := numThreads(npoints)\n\n\tif numThreads < 2 {\n\t\tfor i := 0; i < npoints; i++ {\n\t\t\tvar point *P1Affine\n\n\t\t\tswitch val := pointsIf.(type) {\n\t\t\tcase []*P1Affine:\n\t\t\t\tpoint = val[i]\n\t\t\tcase []P1Affine:\n\t\t\t\tpoint = &val[i]\n\t\t\tcase P1Affines:\n\t\t\t\tpoint = &val[i]\n\t\t\tdefault:\n\t\t\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t\t\t}\n\n\t\t\tif !C.go_p1_affine_validate(&point.cgo, true) {\n\t\t\t\treturn false\n\t\t\t}\n\t\t}\n\n\t\treturn true\n\t}\n\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\n\tvar wg sync.WaitGroup\n\twg.Add(numThreads)\n\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tfor atomic.LoadInt32(&valid) != 0 {\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(npoints) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\tvar point *P1Affine\n\n\t\t\t\tswitch val := pointsIf.(type) {\n\t\t\t\tcase []*P1Affine:\n\t\t\t\t\tpoint = val[work]\n\t\t\t\tcase []P1Affine:\n\t\t\t\t\tpoint = &val[work]\n\t\t\t\tcase P1Affines:\n\t\t\t\t\tpoint = &val[work]\n\t\t\t\tdefault:\n\t\t\t\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t\t\t\t}\n\n\t\t\t\tif !C.go_p1_affine_validate(&point.cgo, true) {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\n\t\t\twg.Done()\n\t\t}()\n\t}\n\n\twg.Wait()\n\n\treturn atomic.LoadInt32(&valid) != 0\n}\n\nfunc (points P1Affines) Validate() bool {\n\treturn P1AffinesValidate(points)\n}\nfunc PairingAggregatePkInG2(ctx Pairing, PK *P2Affine, pkValidate bool,\n\tsig *P1Affine, sigGroupcheck bool, msg []byte,\n\toptional ...[]byte) int { // aug\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tr := C.blst_pairing_chk_n_aggr_pk_in_g2(&ctx[0],\n\t\tPK.asPtr(), C.bool(pkValidate),\n\t\tsig.asPtr(), C.bool(sigGroupcheck),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\n\treturn int(r)\n}\n\nfunc PairingMulNAggregatePkInG2(ctx Pairing, PK *P2Affine, pkValidate bool,\n\tsig *P1Affine, sigGroupcheck bool,\n\trand *Scalar, randBits int, msg []byte,\n\toptional ...[]byte) int { // aug\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tr := C.blst_pairing_chk_n_mul_n_aggr_pk_in_g2(&ctx[0],\n\t\tPK.asPtr(), C.bool(pkValidate),\n\t\tsig.asPtr(), C.bool(sigGroupcheck),\n\t\t&rand.cgo.b[0], C.size_t(randBits),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\n\treturn int(r)\n}\n\n//\n// Serialization/Deserialization.\n//\n\n// P2 Serdes\nfunc (p2 *P2Affine) Serialize() []byte {\n\tvar out [BLST_P2_SERIALIZE_BYTES]byte\n\tC.blst_p2_affine_serialize((*C.byte)(&out[0]), &p2.cgo)\n\treturn out[:]\n}\n\nfunc (p2 *P2Affine) Deserialize(in []byte) *P2Affine {\n\tif len(in) != BLST_P2_SERIALIZE_BYTES {\n\t\treturn nil\n\t}\n\tif C.blst_p2_deserialize(&p2.cgo, (*C.byte)(&in[0])) != C.BLST_SUCCESS {\n\t\treturn nil\n\t}\n\treturn p2\n}\nfunc (p2 *P2Affine) Compress() []byte {\n\tvar out [BLST_P2_COMPRESS_BYTES]byte\n\tC.blst_p2_affine_compress((*C.byte)(&out[0]), &p2.cgo)\n\treturn out[:]\n}\n\nfunc (p2 *P2Affine) Uncompress(in []byte) *P2Affine {\n\tif len(in) != BLST_P2_COMPRESS_BYTES {\n\t\treturn nil\n\t}\n\tif C.blst_p2_uncompress(&p2.cgo, (*C.byte)(&in[0])) != C.BLST_SUCCESS {\n\t\treturn nil\n\t}\n\treturn p2\n}\n\nfunc (p2 *P2Affine) InG2() bool {\n\treturn bool(C.blst_p2_affine_in_g2(&p2.cgo))\n}\n\nfunc (*P2Affine) BatchUncompress(in [][]byte) []*P2Affine {\n\t// Allocate space for all of the resulting points. Later we'll save pointers\n\t// and return those so that the result could be used in other functions,\n\t// such as MultipleAggregateVerify.\n\tn := len(in)\n\tpoints := make([]P2Affine, n)\n\tpointsPtrs := make([]*P2Affine, n)\n\n\tnumThreads := numThreads(n)\n\n\t// Each thread will determine next message to process by atomically\n\t// incrementing curItem, process corresponding point, and\n\t// repeat until n is exceeded. Each thread will send a result (true for\n\t// success, false for failure) into the channel when complete.\n\tresCh := make(chan bool, numThreads)\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tfor atomic.LoadInt32(&valid) > 0 {\n\t\t\t\t// Get a work item\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(n) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tif points[work].Uncompress(in[work]) == nil {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t\tpointsPtrs[work] = &points[work]\n\t\t\t}\n\t\t\tif atomic.LoadInt32(&valid) > 0 {\n\t\t\t\tresCh <- true\n\t\t\t} else {\n\t\t\t\tresCh <- false\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Collect the threads\n\tresult := true\n\tfor i := 0; i < numThreads; i++ {\n\t\tif !<-resCh {\n\t\t\tresult = false\n\t\t}\n\t}\n\tif atomic.LoadInt32(&valid) == 0 || !result {\n\t\treturn nil\n\t}\n\treturn pointsPtrs\n}\n\nfunc (p2 *P2) Serialize() []byte {\n\tvar out [BLST_P2_SERIALIZE_BYTES]byte\n\tC.blst_p2_serialize((*C.byte)(&out[0]), &p2.cgo)\n\treturn out[:]\n}\nfunc (p2 *P2) Compress() []byte {\n\tvar out [BLST_P2_COMPRESS_BYTES]byte\n\tC.blst_p2_compress((*C.byte)(&out[0]), &p2.cgo)\n\treturn out[:]\n}\n\nfunc (p2 *P2) MultAssign(scalarIf interface{}, optional ...int) *P2 {\n\tvar nbits int\n\tvar scalar *C.byte\n\tswitch val := scalarIf.(type) {\n\tcase []byte:\n\t\tscalar = (*C.byte)(&val[0])\n\t\tnbits = len(val) * 8\n\tcase *Scalar:\n\t\tscalar = &val.cgo.b[0]\n\t\tnbits = 255\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\tif len(optional) > 0 {\n\t\tnbits = optional[0]\n\t}\n\tC.blst_p2_mult(&p2.cgo, &p2.cgo, scalar, C.size_t(nbits))\n\treturn p2\n}\n\nfunc (p2 *P2) Mult(scalarIf interface{}, optional ...int) *P2 {\n\tret := *p2\n\treturn ret.MultAssign(scalarIf, optional...)\n}\n\nfunc (p2 *P2) AddAssign(pointIf interface{}) *P2 {\n\tswitch val := pointIf.(type) {\n\tcase *P2:\n\t\tC.blst_p2_add_or_double(&p2.cgo, &p2.cgo, &val.cgo)\n\tcase *P2Affine:\n\t\tC.blst_p2_add_or_double_affine(&p2.cgo, &p2.cgo, &val.cgo)\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\treturn p2\n}\n\nfunc (p2 *P2) Add(pointIf interface{}) *P2 {\n\tret := *p2\n\treturn ret.AddAssign(pointIf)\n}\n\nfunc (p2 *P2) SubAssign(pointIf interface{}) *P2 {\n\tvar x *C.blst_fp2\n\tvar affine C.bool\n\tswitch val := pointIf.(type) {\n\tcase *P2:\n\t\tx = &val.cgo.x\n\t\taffine = false\n\tcase *P2Affine:\n\t\tx = &val.cgo.x\n\t\taffine = true\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\tC.go_p2_sub_assign(&p2.cgo, x, affine)\n\treturn p2\n}\n\nfunc (p2 *P2) Sub(pointIf interface{}) *P2 {\n\tret := *p2\n\treturn ret.SubAssign(pointIf)\n}\n\nfunc P2Generator() *P2 {\n\treturn &cgo_p2Generator\n}\n\n// 'acc += point * scalar', passing 'nil' for 'point' means \"use the\n//\n//\tgroup generator point\"\nfunc (acc *P2) MultNAccumulate(pointIf interface{}, scalarIf interface{},\n\toptional ...int) *P2 {\n\tvar x *C.blst_fp2\n\tvar affine C.bool\n\tif pointIf != nil {\n\t\tswitch val := pointIf.(type) {\n\t\tcase *P2:\n\t\t\tx = &val.cgo.x\n\t\t\taffine = false\n\t\tcase *P2Affine:\n\t\t\tx = &val.cgo.x\n\t\t\taffine = true\n\t\tdefault:\n\t\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t\t}\n\t}\n\tvar nbits int\n\tvar scalar *C.byte\n\tswitch val := scalarIf.(type) {\n\tcase []byte:\n\t\tscalar = (*C.byte)(&val[0])\n\t\tnbits = len(val) * 8\n\tcase *Scalar:\n\t\tscalar = &val.cgo.b[0]\n\t\tnbits = 255\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\tif len(optional) > 0 {\n\t\tnbits = optional[0]\n\t}\n\tC.go_p2_mult_n_acc(&acc.cgo, x, affine, scalar, C.size_t(nbits))\n\treturn acc\n}\n\n//\n// Affine\n//\n\nfunc (p *P2) ToAffine() *P2Affine {\n\tvar pa P2Affine\n\tC.blst_p2_to_affine(&pa.cgo, &p.cgo)\n\treturn &pa\n}\n\nfunc (p *P2) FromAffine(pa *P2Affine) {\n\tC.blst_p2_from_affine(&p.cgo, &pa.cgo)\n}\n\n// Hash\nfunc HashToG2(msg []byte, dst []byte,\n\toptional ...[]byte) *P2 { // aug\n\tvar q P2\n\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tC.blst_hash_to_g2(&q.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\treturn &q\n}\n\nfunc EncodeToG2(msg []byte, dst []byte,\n\toptional ...[]byte) *P2 { // aug\n\tvar q P2\n\n\tvar aug []byte\n\tif len(optional) > 0 {\n\t\taug = optional[0]\n\t}\n\n\tC.blst_encode_to_g2(&q.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)),\n\t\tptrOrNil(aug), C.size_t(len(aug)))\n\treturn &q\n}\n\n//\n// Multi-point/scalar operations\n//\n\nfunc P2sToAffine(points []*P2, optional ...int) P2Affines {\n\tvar npoints int\n\tif len(optional) > 0 {\n\t\tnpoints = optional[0]\n\t} else {\n\t\tnpoints = len(points)\n\t}\n\tret := make([]P2Affine, npoints)\n\t_cgoCheckPointer := func(...interface{}) {}\n\tC.blst_p2s_to_affine(&ret[0].cgo, (**C.blst_p2)(unsafe.Pointer(&points[0])),\n\t\tC.size_t(npoints))\n\treturn ret\n}\n\nfunc (points P2s) ToAffine(optional ...P2Affines) P2Affines {\n\tnpoints := len(points)\n\tvar ret P2Affines\n\n\tif len(optional) > 0 { // used in benchmark\n\t\tret = optional[0]\n\t\tif len(ret) < npoints {\n\t\t\tpanic(\"npoints mismatch\")\n\t\t}\n\t} else {\n\t\tret = make([]P2Affine, npoints)\n\t}\n\n\tif maxProcs < 2 || npoints < 768 {\n\t\tC.go_p2slice_to_affine(&ret[0].cgo, &points[0].cgo, C.size_t(npoints))\n\t\treturn ret\n\t}\n\n\tnslices := (npoints + 511) / 512\n\tif nslices > maxProcs {\n\t\tnslices = maxProcs\n\t}\n\tdelta, rem := npoints/nslices+1, npoints%nslices\n\n\tvar wg sync.WaitGroup\n\twg.Add(nslices)\n\tfor x := 0; x < npoints; x += delta {\n\t\tif rem == 0 {\n\t\t\tdelta -= 1\n\t\t}\n\t\trem -= 1\n\t\tgo func(out *P2Affine, inp *P2, delta int) {\n\t\t\tC.go_p2slice_to_affine(&out.cgo, &inp.cgo, C.size_t(delta))\n\t\t\twg.Done()\n\t\t}(&ret[x], &points[x], delta)\n\t}\n\twg.Wait()\n\n\treturn ret\n}\n\n//\n// Batch addition\n//\n\nfunc P2AffinesAdd(points []*P2Affine, optional ...int) *P2 {\n\tvar npoints int\n\tif len(optional) > 0 {\n\t\tnpoints = optional[0]\n\t} else {\n\t\tnpoints = len(points)\n\t}\n\tvar ret P2\n\t_cgoCheckPointer := func(...interface{}) {}\n\tC.blst_p2s_add(&ret.cgo, (**C.blst_p2_affine)(unsafe.Pointer(&points[0])),\n\t\tC.size_t(npoints))\n\treturn &ret\n}\n\nfunc (points P2Affines) Add() *P2 {\n\tnpoints := len(points)\n\tif maxProcs < 2 || npoints < 768 {\n\t\tvar ret P2\n\t\tC.go_p2slice_add(&ret.cgo, &points[0].cgo, C.size_t(npoints))\n\t\treturn &ret\n\t}\n\n\tnslices := (npoints + 511) / 512\n\tif nslices > maxProcs {\n\t\tnslices = maxProcs\n\t}\n\tdelta, rem := npoints/nslices+1, npoints%nslices\n\n\tmsgs := make(chan P2, nslices)\n\tfor x := 0; x < npoints; x += delta {\n\t\tif rem == 0 {\n\t\t\tdelta -= 1\n\t\t}\n\t\trem -= 1\n\t\tgo func(points *P2Affine, delta int) {\n\t\t\tvar ret P2\n\t\t\tC.go_p2slice_add(&ret.cgo, &points.cgo, C.size_t(delta))\n\t\t\tmsgs <- ret\n\t\t}(&points[x], delta)\n\t}\n\n\tret := <-msgs\n\tfor i := 1; i < nslices; i++ {\n\t\tmsg := <-msgs\n\t\tC.blst_p2_add_or_double(&ret.cgo, &ret.cgo, &msg.cgo)\n\t}\n\treturn &ret\n}\n\nfunc (points P2s) Add() *P2 {\n\treturn points.ToAffine().Add()\n}\n\n//\n// Multi-scalar multiplication\n//\n\nfunc P2AffinesMult(pointsIf interface{}, scalarsIf interface{}, nbits int) *P2 {\n\tvar npoints int\n\tswitch val := pointsIf.(type) {\n\tcase []*P2Affine:\n\t\tnpoints = len(val)\n\tcase []P2Affine:\n\t\tnpoints = len(val)\n\tcase P2Affines:\n\t\tnpoints = len(val)\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\n\tnbytes := (nbits + 7) / 8\n\tvar scalars []*C.byte\n\tswitch val := scalarsIf.(type) {\n\tcase []byte:\n\t\tif len(val) < npoints*nbytes {\n\t\t\treturn nil\n\t\t}\n\tcase [][]byte:\n\t\tif len(val) < npoints {\n\t\t\treturn nil\n\t\t}\n\t\tscalars = make([]*C.byte, npoints)\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = (*C.byte)(&val[i][0])\n\t\t}\n\tcase []Scalar:\n\t\tif len(val) < npoints {\n\t\t\treturn nil\n\t\t}\n\t\tif nbits <= 248 {\n\t\t\tscalars = make([]*C.byte, npoints)\n\t\t\tfor i := range scalars {\n\t\t\t\tscalars[i] = &val[i].cgo.b[0]\n\t\t\t}\n\t\t}\n\tcase []*Scalar:\n\t\tif len(val) < npoints {\n\t\t\treturn nil\n\t\t}\n\t\tscalars = make([]*C.byte, npoints)\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = &val[i].cgo.b[0]\n\t\t}\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\n\tnumThreads := numThreads(0)\n\n\tif numThreads < 2 {\n\t\tsz := int(C.blst_p2s_mult_pippenger_scratch_sizeof(C.size_t(npoints))) / 8\n\t\tscratch := make([]uint64, sz)\n\n\t\tpointsBySlice := [2]*C.blst_p2_affine{nil, nil}\n\t\tvar p_points **C.blst_p2_affine\n\t\tswitch val := pointsIf.(type) {\n\t\tcase []*P2Affine:\n\t\t\tp_points = (**C.blst_p2_affine)(unsafe.Pointer(&val[0]))\n\t\tcase []P2Affine:\n\t\t\tpointsBySlice[0] = &val[0].cgo\n\t\t\tp_points = &pointsBySlice[0]\n\t\tcase P2Affines:\n\t\t\tpointsBySlice[0] = &val[0].cgo\n\t\t\tp_points = &pointsBySlice[0]\n\t\tdefault: // type is already vetted\n\t\t}\n\n\t\tscalarsBySlice := [2]*C.byte{nil, nil}\n\t\tvar p_scalars **C.byte\n\t\tswitch val := scalarsIf.(type) {\n\t\tcase []byte:\n\t\t\tscalarsBySlice[0] = (*C.byte)(&val[0])\n\t\t\tp_scalars = &scalarsBySlice[0]\n\t\tcase [][]byte:\n\t\t\tp_scalars = &scalars[0]\n\t\tcase []Scalar:\n\t\t\tif nbits > 248 {\n\t\t\t\tscalarsBySlice[0] = &val[0].cgo.b[0]\n\t\t\t\tp_scalars = &scalarsBySlice[0]\n\t\t\t} else {\n\t\t\t\tp_scalars = &scalars[0]\n\t\t\t}\n\t\tcase []*Scalar:\n\t\t\tp_scalars = &scalars[0]\n\t\tdefault: // type is already vetted\n\t\t}\n\n\t\tvar ret P2\n\t\t_cgoCheckPointer := func(...interface{}) {}\n\t\tC.blst_p2s_mult_pippenger(&ret.cgo, p_points, C.size_t(npoints),\n\t\t\tp_scalars, C.size_t(nbits),\n\t\t\t(*C.limb_t)(&scratch[0]))\n\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = nil\n\t\t}\n\n\t\treturn &ret\n\t}\n\n\tif npoints < 32 {\n\t\tif numThreads > npoints {\n\t\t\tnumThreads = npoints\n\t\t}\n\n\t\tcurItem := uint32(0)\n\t\tmsgs := make(chan P2, numThreads)\n\n\t\tfor tid := 0; tid < numThreads; tid++ {\n\t\t\tgo func() {\n\t\t\t\tvar acc P2\n\n\t\t\t\tfor {\n\t\t\t\t\tworkItem := int(atomic.AddUint32(&curItem, 1) - 1)\n\t\t\t\t\tif workItem >= npoints {\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\n\t\t\t\t\tvar point *P2Affine\n\t\t\t\t\tswitch val := pointsIf.(type) {\n\t\t\t\t\tcase []*P2Affine:\n\t\t\t\t\t\tpoint = val[workItem]\n\t\t\t\t\tcase []P2Affine:\n\t\t\t\t\t\tpoint = &val[workItem]\n\t\t\t\t\tcase P2Affines:\n\t\t\t\t\t\tpoint = &val[workItem]\n\t\t\t\t\tdefault: // type is already vetted\n\t\t\t\t\t}\n\n\t\t\t\t\tvar scalar *C.byte\n\t\t\t\t\tswitch val := scalarsIf.(type) {\n\t\t\t\t\tcase []byte:\n\t\t\t\t\t\tscalar = (*C.byte)(&val[workItem*nbytes])\n\t\t\t\t\tcase [][]byte:\n\t\t\t\t\t\tscalar = scalars[workItem]\n\t\t\t\t\tcase []Scalar:\n\t\t\t\t\t\tif nbits > 248 {\n\t\t\t\t\t\t\tscalar = &val[workItem].cgo.b[0]\n\t\t\t\t\t\t} else {\n\t\t\t\t\t\t\tscalar = scalars[workItem]\n\t\t\t\t\t\t}\n\t\t\t\t\tcase []*Scalar:\n\t\t\t\t\t\tscalar = scalars[workItem]\n\t\t\t\t\tdefault: // type is already vetted\n\t\t\t\t\t}\n\n\t\t\t\t\tC.go_p2_mult_n_acc(&acc.cgo, &point.cgo.x, true,\n\t\t\t\t\t\tscalar, C.size_t(nbits))\n\t\t\t\t}\n\n\t\t\t\tmsgs <- acc\n\t\t\t}()\n\t\t}\n\n\t\tret := <-msgs\n\t\tfor tid := 1; tid < numThreads; tid++ {\n\t\t\tpoint := <-msgs\n\t\t\tC.blst_p2_add_or_double(&ret.cgo, &ret.cgo, &point.cgo)\n\t\t}\n\n\t\tfor i := range scalars {\n\t\t\tscalars[i] = nil\n\t\t}\n\n\t\treturn &ret\n\t}\n\n\t// this is sizeof(scratch[0])\n\tsz := int(C.blst_p2s_mult_pippenger_scratch_sizeof(0)) / 8\n\n\tnx, ny, window := breakdown(nbits, pippenger_window_size(npoints),\n\t\tnumThreads)\n\n\t// |grid[]| holds \"coordinates\" and place for result\n\tgrid := make([]struct {\n\t\tx, dx, y, dy int\n\t\tpoint        P2\n\t}, nx*ny)\n\n\tdx := npoints / nx\n\ty := window * (ny - 1)\n\ttotal := 0\n\tfor ; total < nx; total++ {\n\t\tgrid[total].x = total * dx\n\t\tgrid[total].dx = dx\n\t\tgrid[total].y = y\n\t\tgrid[total].dy = nbits - y\n\t}\n\tgrid[total-1].dx = npoints - grid[total-1].x\n\n\tfor y > 0 {\n\t\ty -= window\n\t\tfor i := 0; i < nx; i++ {\n\t\t\tgrid[total].x = grid[i].x\n\t\t\tgrid[total].dx = grid[i].dx\n\t\t\tgrid[total].y = y\n\t\t\tgrid[total].dy = window\n\t\t\ttotal++\n\t\t}\n\t}\n\n\tif numThreads > total {\n\t\tnumThreads = total\n\t}\n\n\tmsgsCh := make(chan int, ny)\n\trowSync := make([]int32, ny) // count up to |nx|\n\tcurItem := int32(0)\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tscratch := make([]uint64, sz<<uint(window-1))\n\t\t\tpointsBySlice := [2]*C.blst_p2_affine{nil, nil}\n\t\t\tscalarsBySlice := [2]*C.byte{nil, nil}\n\t\t\t_cgoCheckPointer := func(...interface{}) {}\n\n\t\t\tfor {\n\t\t\t\tworkItem := atomic.AddInt32(&curItem, 1) - 1\n\t\t\t\tif int(workItem) >= total {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\tx := grid[workItem].x\n\t\t\t\ty := grid[workItem].y\n\n\t\t\t\tvar p_points **C.blst_p2_affine\n\t\t\t\tswitch val := pointsIf.(type) {\n\t\t\t\tcase []*P2Affine:\n\t\t\t\t\tp_points = (**C.blst_p2_affine)(unsafe.Pointer(&val[x]))\n\t\t\t\tcase []P2Affine:\n\t\t\t\t\tpointsBySlice[0] = &val[x].cgo\n\t\t\t\t\tp_points = &pointsBySlice[0]\n\t\t\t\tcase P2Affines:\n\t\t\t\t\tpointsBySlice[0] = &val[x].cgo\n\t\t\t\t\tp_points = &pointsBySlice[0]\n\t\t\t\tdefault: // type is already vetted\n\t\t\t\t}\n\n\t\t\t\tvar p_scalars **C.byte\n\t\t\t\tswitch val := scalarsIf.(type) {\n\t\t\t\tcase []byte:\n\t\t\t\t\tscalarsBySlice[0] = (*C.byte)(&val[x*nbytes])\n\t\t\t\t\tp_scalars = &scalarsBySlice[0]\n\t\t\t\tcase [][]byte:\n\t\t\t\t\tp_scalars = &scalars[x]\n\t\t\t\tcase []Scalar:\n\t\t\t\t\tif nbits > 248 {\n\t\t\t\t\t\tscalarsBySlice[0] = &val[x].cgo.b[0]\n\t\t\t\t\t\tp_scalars = &scalarsBySlice[0]\n\t\t\t\t\t} else {\n\t\t\t\t\t\tp_scalars = &scalars[x]\n\t\t\t\t\t}\n\t\t\t\tcase []*Scalar:\n\t\t\t\t\tp_scalars = &scalars[x]\n\t\t\t\tdefault: // type is already vetted\n\t\t\t\t}\n\n\t\t\t\tC.blst_p2s_tile_pippenger(&grid[workItem].point.cgo,\n\t\t\t\t\tp_points, C.size_t(grid[workItem].dx),\n\t\t\t\t\tp_scalars, C.size_t(nbits),\n\t\t\t\t\t(*C.limb_t)(&scratch[0]),\n\t\t\t\t\tC.size_t(y), C.size_t(window))\n\n\t\t\t\tif atomic.AddInt32(&rowSync[y/window], 1) == int32(nx) {\n\t\t\t\t\tmsgsCh <- y // \"row\" is done\n\t\t\t\t} else {\n\t\t\t\t\truntime.Gosched() // be nice to the application\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tpointsBySlice[0] = nil\n\t\t\tscalarsBySlice[0] = nil\n\t\t}()\n\t}\n\n\tvar ret P2\n\trows := make([]bool, ny)\n\trow := 0                  // actually index in |grid[]|\n\tfor i := 0; i < ny; i++ { // we expect |ny| messages, one per \"row\"\n\t\ty := <-msgsCh\n\t\trows[y/window] = true  // mark the \"row\"\n\t\tfor grid[row].y == y { // if it's current \"row\", process it\n\t\t\tfor row < total && grid[row].y == y {\n\t\t\t\tC.blst_p2_add_or_double(&ret.cgo, &ret.cgo, &grid[row].point.cgo)\n\t\t\t\trow++\n\t\t\t}\n\t\t\tif y == 0 {\n\t\t\t\tbreak // one can as well 'return &ret' here\n\t\t\t}\n\t\t\tfor j := 0; j < window; j++ {\n\t\t\t\tC.blst_p2_double(&ret.cgo, &ret.cgo)\n\t\t\t}\n\t\t\ty -= window\n\t\t\tif !rows[y/window] { // see if next \"row\" was marked already\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t}\n\n\tfor i := range scalars {\n\t\tscalars[i] = nil\n\t}\n\n\treturn &ret\n}\n\nfunc (points P2Affines) Mult(scalarsIf interface{}, nbits int) *P2 {\n\treturn P2AffinesMult(points, scalarsIf, nbits)\n}\n\nfunc (points P2s) Mult(scalarsIf interface{}, nbits int) *P2 {\n\treturn points.ToAffine().Mult(scalarsIf, nbits)\n}\n\n//\n// Group-check\n//\n\nfunc P2AffinesValidate(pointsIf interface{}) bool {\n\tvar npoints int\n\tswitch val := pointsIf.(type) {\n\tcase []*P2Affine:\n\t\tnpoints = len(val)\n\tcase []P2Affine:\n\t\tnpoints = len(val)\n\tcase P2Affines:\n\t\tnpoints = len(val)\n\tdefault:\n\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t}\n\n\tnumThreads := numThreads(npoints)\n\n\tif numThreads < 2 {\n\t\tfor i := 0; i < npoints; i++ {\n\t\t\tvar point *P2Affine\n\n\t\t\tswitch val := pointsIf.(type) {\n\t\t\tcase []*P2Affine:\n\t\t\t\tpoint = val[i]\n\t\t\tcase []P2Affine:\n\t\t\t\tpoint = &val[i]\n\t\t\tcase P2Affines:\n\t\t\t\tpoint = &val[i]\n\t\t\tdefault:\n\t\t\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t\t\t}\n\n\t\t\tif !C.go_p2_affine_validate(&point.cgo, true) {\n\t\t\t\treturn false\n\t\t\t}\n\t\t}\n\n\t\treturn true\n\t}\n\n\tvalid := int32(1)\n\tcurItem := uint32(0)\n\n\tvar wg sync.WaitGroup\n\twg.Add(numThreads)\n\n\tfor tid := 0; tid < numThreads; tid++ {\n\t\tgo func() {\n\t\t\tfor atomic.LoadInt32(&valid) != 0 {\n\t\t\t\twork := atomic.AddUint32(&curItem, 1) - 1\n\t\t\t\tif work >= uint32(npoints) {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\tvar point *P2Affine\n\n\t\t\t\tswitch val := pointsIf.(type) {\n\t\t\t\tcase []*P2Affine:\n\t\t\t\t\tpoint = val[work]\n\t\t\t\tcase []P2Affine:\n\t\t\t\t\tpoint = &val[work]\n\t\t\t\tcase P2Affines:\n\t\t\t\t\tpoint = &val[work]\n\t\t\t\tdefault:\n\t\t\t\t\tpanic(fmt.Sprintf(\"unsupported type %T\", val))\n\t\t\t\t}\n\n\t\t\t\tif !C.go_p2_affine_validate(&point.cgo, true) {\n\t\t\t\t\tatomic.StoreInt32(&valid, 0)\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\n\t\t\twg.Done()\n\t\t}()\n\t}\n\n\twg.Wait()\n\n\treturn atomic.LoadInt32(&valid) != 0\n}\n\nfunc (points P2Affines) Validate() bool {\n\treturn P2AffinesValidate(points)\n}\n\n// aug [][]byte - augmentation bytes for signing (default: nil)\nfunc parseOpts(optional ...interface{}) (augSingle []byte, aug [][]byte,\n\tuseHash bool, ok bool) {\n\tuseHash = true // hash (true), encode (false)\n\n\tfor _, arg := range optional {\n\t\tswitch v := arg.(type) {\n\t\tcase []byte:\n\t\t\taugSingle = v\n\t\tcase [][]byte:\n\t\t\taug = v\n\t\tcase bool:\n\t\t\tuseHash = v\n\t\tdefault:\n\t\t\treturn nil, nil, useHash, false\n\t\t}\n\t}\n\treturn augSingle, aug, useHash, true\n}\n\n// These methods are inefficient because of cgo call overhead. For this\n// reason they should be used primarily for prototyping with a goal to\n// formulate interfaces that would process multiple scalars per cgo call.\nfunc (a *Scalar) MulAssign(b *Scalar) (*Scalar, bool) {\n\treturn a, bool(C.blst_sk_mul_n_check(&a.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Mul(b *Scalar) (*Scalar, bool) {\n\tvar ret Scalar\n\treturn &ret, bool(C.blst_sk_mul_n_check(&ret.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) AddAssign(b *Scalar) (*Scalar, bool) {\n\treturn a, bool(C.blst_sk_add_n_check(&a.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Add(b *Scalar) (*Scalar, bool) {\n\tvar ret Scalar\n\treturn &ret, bool(C.blst_sk_add_n_check(&ret.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) SubAssign(b *Scalar) (*Scalar, bool) {\n\treturn a, bool(C.blst_sk_sub_n_check(&a.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Sub(b *Scalar) (*Scalar, bool) {\n\tvar ret Scalar\n\treturn &ret, bool(C.blst_sk_sub_n_check(&ret.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Inverse() *Scalar {\n\tvar ret Scalar\n\tC.blst_sk_inverse(&ret.cgo, &a.cgo)\n\treturn &ret\n}\n\n//\n// Serialization/Deserialization.\n//\n\n// Scalar serdes\nfunc (s *Scalar) Serialize() []byte {\n\tvar out [BLST_SCALAR_BYTES]byte\n\tC.blst_bendian_from_scalar((*C.byte)(&out[0]), &s.cgo)\n\treturn out[:]\n}\n\nfunc (s *Scalar) Deserialize(in []byte) *Scalar {\n\tif len(in) != BLST_SCALAR_BYTES ||\n\t\t!C.go_scalar_from_bendian(&s.cgo, (*C.byte)(&in[0])) {\n\t\treturn nil\n\t}\n\treturn s\n}\n\nfunc (s *Scalar) Valid() bool {\n\treturn bool(C.blst_sk_check(&s.cgo))\n}\n\nfunc (s *Scalar) HashTo(msg []byte, dst []byte) bool {\n\tret := HashToScalar(msg, dst)\n\tif ret != nil {\n\t\t*s = *ret\n\t\treturn true\n\t}\n\treturn false\n}\n\nfunc HashToScalar(msg []byte, dst []byte) *Scalar {\n\tvar ret Scalar\n\n\tif C.go_hash_to_scalar(&ret.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst))) {\n\t\treturn &ret\n\t}\n\n\treturn nil\n}\n\n//\n// LEndian\n//\n\nfunc (fr *Scalar) ToLEndian() []byte {\n\tvar arr [BLST_SCALAR_BYTES]byte\n\tC.blst_lendian_from_scalar((*C.byte)(&arr[0]), &fr.cgo)\n\treturn arr[:]\n}\n\nfunc (fp *Fp) ToLEndian() []byte {\n\tvar arr [BLST_FP_BYTES]byte\n\tC.blst_lendian_from_fp((*C.byte)(&arr[0]), &fp.cgo)\n\treturn arr[:]\n}\n\nfunc (fr *Scalar) FromLEndian(arr []byte) *Scalar {\n\tnbytes := len(arr)\n\tif nbytes < BLST_SCALAR_BYTES ||\n\t\t!C.blst_scalar_from_le_bytes(&fr.cgo, (*C.byte)(&arr[0]), C.size_t(nbytes)) {\n\t\treturn nil\n\t}\n\treturn fr\n}\n\nfunc (fp *Fp) FromLEndian(arr []byte) *Fp {\n\tif len(arr) != BLST_FP_BYTES {\n\t\treturn nil\n\t}\n\tC.blst_fp_from_lendian(&fp.cgo, (*C.byte)(&arr[0]))\n\treturn fp\n}\n\n//\n// BEndian\n//\n\nfunc (fr *Scalar) ToBEndian() []byte {\n\tvar arr [BLST_SCALAR_BYTES]byte\n\tC.blst_bendian_from_scalar((*C.byte)(&arr[0]), &fr.cgo)\n\treturn arr[:]\n}\n\nfunc (fp *Fp) ToBEndian() []byte {\n\tvar arr [BLST_FP_BYTES]byte\n\tC.blst_bendian_from_fp((*C.byte)(&arr[0]), &fp.cgo)\n\treturn arr[:]\n}\n\nfunc (fr *Scalar) FromBEndian(arr []byte) *Scalar {\n\tnbytes := len(arr)\n\tif nbytes < BLST_SCALAR_BYTES ||\n\t\t!C.blst_scalar_from_be_bytes(&fr.cgo, (*C.byte)(&arr[0]), C.size_t(nbytes)) {\n\t\treturn nil\n\t}\n\treturn fr\n}\n\nfunc (fp *Fp) FromBEndian(arr []byte) *Fp {\n\tif len(arr) != BLST_FP_BYTES {\n\t\treturn nil\n\t}\n\tC.blst_fp_from_bendian(&fp.cgo, (*C.byte)(&arr[0]))\n\treturn fp\n}\n\n//\n// Printing\n//\n\nfunc PrintBytes(val []byte, name string) {\n\tfmt.Printf(\"%s = %02x\\n\", name, val)\n}\n\nfunc (s *Scalar) Print(name string) {\n\tarr := s.ToBEndian()\n\tPrintBytes(arr, name)\n}\n\nfunc (p *P1Affine) Print(name string) {\n\tfmt.Printf(\"%s:\\n\", name)\n\tx := Fp{p.cgo.x}\n\tarr := x.ToBEndian()\n\tPrintBytes(arr, \"  x\")\n\ty := Fp{p.cgo.y}\n\tarr = y.ToBEndian()\n\tPrintBytes(arr, \"  y\")\n}\n\nfunc (p *P1) Print(name string) {\n\tfmt.Printf(\"%s:\\n\", name)\n\taff := p.ToAffine()\n\taff.Print(name)\n}\n\nfunc (f *Fp2) Print(name string) {\n\tfmt.Printf(\"%s:\\n\", name)\n\tvar arr [BLST_FP_BYTES]byte\n\tC.blst_bendian_from_fp((*C.byte)(&arr[0]), &f.cgo.fp[0])\n\tPrintBytes(arr[:], \"    0\")\n\tC.blst_bendian_from_fp((*C.byte)(&arr[0]), &f.cgo.fp[1])\n\tPrintBytes(arr[:], \"    1\")\n}\n\nfunc (p *P2Affine) Print(name string) {\n\tfmt.Printf(\"%s:\\n\", name)\n\tx := Fp2{p.cgo.x}\n\tx.Print(\"  x\")\n\ty := Fp2{p.cgo.y}\n\ty.Print(\"  y\")\n}\n\nfunc (p *P2) Print(name string) {\n\tfmt.Printf(\"%s:\\n\", name)\n\taff := p.ToAffine()\n\taff.Print(name)\n}\n\n//\n// Equality\n//\n\nfunc (s1 *Scalar) Equals(s2 *Scalar) bool {\n\treturn *s1 == *s2\n}\n\nfunc (e1 *Fp) Equals(e2 *Fp) bool {\n\treturn *e1 == *e2\n}\n\nfunc (e1 *Fp2) Equals(e2 *Fp2) bool {\n\treturn *e1 == *e2\n}\n\nfunc (e1 *P1Affine) Equals(e2 *P1Affine) bool {\n\treturn bool(C.blst_p1_affine_is_equal(&e1.cgo, &e2.cgo))\n}\n\nfunc (pt *P1Affine) asPtr() *C.blst_p1_affine {\n\tif pt != nil {\n\t\treturn &pt.cgo\n\t}\n\n\treturn nil\n}\n\nfunc (e1 *P1) Equals(e2 *P1) bool {\n\treturn bool(C.blst_p1_is_equal(&e1.cgo, &e2.cgo))\n}\n\nfunc (e1 *P2Affine) Equals(e2 *P2Affine) bool {\n\treturn bool(C.blst_p2_affine_is_equal(&e1.cgo, &e2.cgo))\n}\n\nfunc (pt *P2Affine) asPtr() *C.blst_p2_affine {\n\tif pt != nil {\n\t\treturn &pt.cgo\n\t}\n\n\treturn nil\n}\n\nfunc (e1 *P2) Equals(e2 *P2) bool {\n\treturn bool(C.blst_p2_is_equal(&e1.cgo, &e2.cgo))\n}\n\n// private thunk for testing\n\nfunc expandMessageXmd(msg []byte, dst []byte, len_in_bytes int) []byte {\n\tret := make([]byte, len_in_bytes)\n\n\tC.blst_expand_message_xmd((*C.byte)(&ret[0]), C.size_t(len(ret)),\n\t\tptrOrNil(msg), C.size_t(len(msg)),\n\t\tptrOrNil(dst), C.size_t(len(dst)))\n\treturn ret\n}\n\nfunc breakdown(nbits, window, ncpus int) (nx int, ny int, wnd int) {\n\n\tif nbits > window*ncpus { //nolint:nestif\n\t\tnx = 1\n\t\twnd = bits.Len(uint(ncpus) / 4)\n\t\tif (window + wnd) > 18 {\n\t\t\twnd = window - wnd\n\t\t} else {\n\t\t\twnd = (nbits/window + ncpus - 1) / ncpus\n\t\t\tif (nbits/(window+1)+ncpus-1)/ncpus < wnd {\n\t\t\t\twnd = window + 1\n\t\t\t} else {\n\t\t\t\twnd = window\n\t\t\t}\n\t\t}\n\t} else {\n\t\tnx = 2\n\t\twnd = window - 2\n\t\tfor (nbits/wnd+1)*nx < ncpus {\n\t\t\tnx += 1\n\t\t\twnd = window - bits.Len(3*uint(nx)/2)\n\t\t}\n\t\tnx -= 1\n\t\twnd = window - bits.Len(3*uint(nx)/2)\n\t}\n\tny = nbits/wnd + 1\n\twnd = nbits/ny + 1\n\n\treturn nx, ny, wnd\n}\n\nfunc pippenger_window_size(npoints int) int {\n\twbits := bits.Len(uint(npoints))\n\n\tif wbits > 13 {\n\t\treturn wbits - 4\n\t}\n\tif wbits > 5 {\n\t\treturn wbits - 3\n\t}\n\treturn 2\n}\n"
  },
  {
    "path": "bindings/go/blst.tgo",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\npackage blst\n\n// #cgo CFLAGS: -I${SRCDIR}/.. -I${SRCDIR}/../../build -I${SRCDIR}/../../src -D__BLST_CGO__ -fno-builtin-memcpy -fno-builtin-memset\n// #cgo amd64 CFLAGS: -D__ADX__ -mno-avx\n// // no-asm 64-bit platforms from https://go.dev/doc/install/source\n// #cgo loong64 mips64 mips64le ppc64 ppc64le riscv64 s390x CFLAGS: -D__BLST_NO_ASM__\n//\n// #include \"blst.h\"\n//\n// #if defined(__x86_64__) && (defined(__unix__) || defined(__APPLE__))\n// # include <signal.h>\n// # include <unistd.h>\n// static void handler(int signum)\n// {   ssize_t n = write(2, \"Caught SIGILL in blst_cgo_init, \"\n//                          \"consult <blst>/bindings/go/README.md.\\n\", 70);\n//     _exit(128+SIGILL);\n//     (void)n;\n// }\n// __attribute__((constructor)) static void blst_cgo_init()\n// {   blst_fp temp = { 0 };\n//     struct sigaction act = { handler }, oact;\n//     sigaction(SIGILL, &act, &oact);\n//     blst_fp_sqr(&temp, &temp);\n//     sigaction(SIGILL, &oact, NULL);\n// }\n// #endif\n//\n// static void go_pairing_init(blst_pairing *new_ctx, bool hash_or_encode,\n//                             const byte *DST, size_t DST_len)\n// {   if (DST != NULL) {\n//         byte *dst = (byte*)new_ctx + blst_pairing_sizeof();\n//         for(size_t i = 0; i < DST_len; i++) dst[i] = DST[i];\n//         DST = dst;\n//     }\n//     blst_pairing_init(new_ctx, hash_or_encode, DST, DST_len);\n// }\n// static void go_pairing_as_fp12(blst_fp12 *pt, blst_pairing *ctx)\n// {   *pt = *blst_pairing_as_fp12(ctx);   }\n//\n// static void go_p1slice_to_affine(blst_p1_affine dst[],\n//                                  const blst_p1 points[], size_t npoints)\n// {   const blst_p1 *ppoints[2] = { points, NULL };\n//     blst_p1s_to_affine(dst, ppoints, npoints);\n// }\n// static void go_p1slice_add(blst_p1 *dst, const blst_p1_affine points[],\n//                                          size_t npoints)\n// {   const blst_p1_affine *ppoints[2] = { points, NULL };\n//     blst_p1s_add(dst, ppoints, npoints);\n// }\n// static void go_p2slice_to_affine(blst_p2_affine dst[],\n//                                  const blst_p2 points[], size_t npoints)\n// {   const blst_p2 *ppoints[2] = { points, NULL };\n//     blst_p2s_to_affine(dst, ppoints, npoints);\n// }\n// static void go_p2slice_add(blst_p2 *dst, const blst_p2_affine points[],\n//                                          size_t npoints)\n// {   const blst_p2_affine *ppoints[2] = { points, NULL };\n//     blst_p2s_add(dst, ppoints, npoints);\n// }\n//\n// static void go_p1_mult_n_acc(blst_p1 *acc, const blst_fp *x, bool affine,\n//                                            const byte *scalar, size_t nbits)\n// {   blst_p1 m[1];\n//     const void *p = x;\n//     if (p == NULL)\n//         p = blst_p1_generator();\n//     else if (affine)\n//         blst_p1_from_affine(m, p), p = m;\n//     blst_p1_mult(m, p, scalar, nbits);\n//     blst_p1_add_or_double(acc, acc, m);\n// }\n// static void go_p2_mult_n_acc(blst_p2 *acc, const blst_fp2 *x, bool affine,\n//                                            const byte *scalar, size_t nbits)\n// {   blst_p2 m[1];\n//     const void *p = x;\n//     if (p == NULL)\n//         p = blst_p2_generator();\n//     else if (affine)\n//         blst_p2_from_affine(m, p), p = m;\n//     blst_p2_mult(m, p, scalar, nbits);\n//     blst_p2_add_or_double(acc, acc, m);\n// }\n//\n// static void go_p1_sub_assign(blst_p1 *a, const blst_fp *x, bool affine)\n// {   blst_p1 minus_b;\n//     if (affine)\n//         blst_p1_from_affine(&minus_b, (const blst_p1_affine*)x);\n//     else\n//         minus_b = *(const blst_p1*)x;\n//     blst_p1_cneg(&minus_b, 1);\n//     blst_p1_add_or_double(a, a, &minus_b);\n// }\n//\n// static void go_p2_sub_assign(blst_p2 *a, const blst_fp2 *x, bool affine)\n// {   blst_p2 minus_b;\n//     if (affine)\n//         blst_p2_from_affine(&minus_b, (const blst_p2_affine*)x);\n//     else\n//         minus_b = *(const blst_p2*)x;\n//     blst_p2_cneg(&minus_b, 1);\n//     blst_p2_add_or_double(a, a, &minus_b);\n// }\n//\n// static bool go_scalar_from_bendian(blst_scalar *ret, const byte *in)\n// {   blst_scalar_from_bendian(ret, in);\n//     return blst_sk_check(ret);\n// }\n// static bool go_hash_to_scalar(blst_scalar *ret,\n//                               const byte *msg, size_t msg_len,\n//                               const byte *DST, size_t DST_len)\n// {   byte elem[48];\n//     blst_expand_message_xmd(elem, sizeof(elem), msg, msg_len, DST, DST_len);\n//     return blst_scalar_from_be_bytes(ret, elem, sizeof(elem));\n// }\n// static void go_miller_loop_n(blst_fp12 *dst, const blst_p2_affine Q[],\n//                                              const blst_p1_affine P[],\n//                                              size_t npoints, bool acc)\n// {   const blst_p2_affine *Qs[2] = { Q, NULL };\n//     const blst_p1_affine *Ps[2] = { P, NULL };\n//     if (acc) {\n//         blst_fp12 tmp;\n//         blst_miller_loop_n(&tmp, Qs, Ps, npoints);\n//         blst_fp12_mul(dst, dst, &tmp);\n//     } else {\n//         blst_miller_loop_n(dst, Qs, Ps, npoints);\n//     }\n// }\n// static void go_fp12slice_mul(blst_fp12 *dst, const blst_fp12 in[], size_t n)\n// {   size_t i;\n//     blst_fp12_mul(dst, &in[0], &in[1]);\n//     for (i = 2; i < n; i++)\n//         blst_fp12_mul(dst, dst, &in[i]);\n// }\n// static bool go_p1_affine_validate(const blst_p1_affine *p, bool infcheck)\n// {   if (infcheck && blst_p1_affine_is_inf(p))\n//         return 0;\n//     return blst_p1_affine_in_g1(p);\n// }\n// static bool go_p2_affine_validate(const blst_p2_affine *p, bool infcheck)\n// {   if (infcheck && blst_p2_affine_is_inf(p))\n//         return 0;\n//     return blst_p2_affine_in_g2(p);\n// }\nimport \"C\"\n\nimport \"runtime\"\n\nconst BLST_SCALAR_BYTES = 256 / 8\nconst BLST_FP_BYTES = 384 / 8\nconst BLST_P1_COMPRESS_BYTES = BLST_FP_BYTES\nconst BLST_P1_SERIALIZE_BYTES = BLST_FP_BYTES * 2\nconst BLST_P2_COMPRESS_BYTES = BLST_FP_BYTES * 2\nconst BLST_P2_SERIALIZE_BYTES = BLST_FP_BYTES * 4\n\ntype Scalar struct{ cgo C.blst_scalar }\ntype Fp struct{ cgo C.blst_fp }\ntype Fp2 struct{ cgo C.blst_fp2 }\ntype Fp6 = C.blst_fp6\ntype Fp12 struct{ cgo C.blst_fp12 }\ntype P1 struct{ cgo C.blst_p1 }\ntype P2 struct{ cgo C.blst_p2 }\ntype P1Affine struct{ cgo C.blst_p1_affine }\ntype P2Affine struct{ cgo C.blst_p2_affine }\ntype Message = []byte\ntype Pairing = []C.blst_pairing\ntype SecretKey = Scalar\ntype P1s []P1\ntype P2s []P2\ntype P1Affines []P1Affine\ntype P2Affines []P2Affine\n\n//\n// Configuration\n//\n\nvar maxProcs = initMaxProcs()\n\nfunc initMaxProcs() int {\n    maxProcs := runtime.GOMAXPROCS(0)\n    var version float32\n    _, err := fmt.Sscanf(runtime.Version(), \"go%f\", &version)\n    if err != nil || version < 1.14 {\n        // be cooperative and leave one processor for the application\n        maxProcs -= 1\n    }\n    if maxProcs <= 0 {\n        maxProcs = 1\n    }\n    return maxProcs\n}\n\nfunc SetMaxProcs(procs int) {\n    if procs <= 0 {\n        procs = 1\n    }\n    maxProcs = procs\n}\n\nfunc numThreads(maxThreads int) int {\n    numThreads := maxProcs\n\n    // take into consideration the possility that application reduced\n    // GOMAXPROCS after |maxProcs| was initialized\n    numProcs := runtime.GOMAXPROCS(0)\n    if maxProcs > numProcs {\n        numThreads = numProcs\n    }\n\n    if maxThreads > 0 && numThreads > maxThreads {\n        return maxThreads\n    }\n    return numThreads\n}\n\nvar cgo_pairingSizeOf = C.blst_pairing_sizeof()\nvar cgo_p1Generator = P1{*C.blst_p1_generator()}\nvar cgo_p2Generator = P2{*C.blst_p2_generator()}\nvar cgo_fp12One = Fp12{*C.blst_fp12_one()}\n\n//\n// Secret key\n//\nfunc (sk *SecretKey) Zeroize() {\n    var zero SecretKey\n    *sk = zero\n}\n\nfunc KeyGen(ikm []byte, optional ...[]byte) *SecretKey {\n    var sk SecretKey\n    var info []byte\n    if len(optional) > 0 {\n        info = optional[0]\n    }\n    if len(ikm) < 32 {\n        return nil\n    }\n    C.blst_keygen(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n                           ptrOrNil(info), C.size_t(len(info)))\n    // Postponing secret key zeroing till garbage collection can be too\n    // late to be effective, but every little bit helps...\n    runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n    return &sk\n}\n\nfunc KeyGenV3(ikm []byte, optional ...[]byte) *SecretKey {\n    if len(ikm) < 32 {\n        return nil\n    }\n    var sk SecretKey\n    var info []byte\n    if len(optional) > 0 {\n        info = optional[0]\n    }\n    C.blst_keygen_v3(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n                              ptrOrNil(info), C.size_t(len(info)))\n    // Postponing secret key zeroing till garbage collection can be too\n    // late to be effective, but every little bit helps...\n    runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n    return &sk\n}\n\nfunc KeyGenV45(ikm []byte, salt []byte, optional ...[]byte) *SecretKey {\n    if len(ikm) < 32 {\n        return nil\n    }\n    var sk SecretKey\n    var info []byte\n    if len(optional) > 0 {\n        info = optional[0]\n    }\n    C.blst_keygen_v4_5(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n                                (*C.byte)(&salt[0]), C.size_t(len(salt)),\n                                ptrOrNil(info), C.size_t(len(info)))\n    // Postponing secret key zeroing till garbage collection can be too\n    // late to be effective, but every little bit helps...\n    runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n    return &sk\n}\n\nfunc KeyGenV5(ikm []byte, salt []byte, optional ...[]byte) *SecretKey {\n    if len(ikm) < 32 {\n        return nil\n    }\n    var sk SecretKey\n    var info []byte\n    if len(optional) > 0 {\n        info = optional[0]\n    }\n    saltLen := len(salt)\n    if saltLen == 0 {\n        salt = []byte{0}\n    }\n    C.blst_keygen_v5(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)),\n                              (*C.byte)(&salt[0]), C.size_t(saltLen),\n                              ptrOrNil(info), C.size_t(len(info)))\n    // Postponing secret key zeroing till garbage collection can be too\n    // late to be effective, but every little bit helps...\n    runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n    return &sk\n}\n\nfunc DeriveMasterEip2333(ikm []byte) *SecretKey {\n    if len(ikm) < 32 {\n        return nil\n    }\n    var sk SecretKey\n    C.blst_derive_master_eip2333(&sk.cgo, (*C.byte)(&ikm[0]), C.size_t(len(ikm)))\n    // Postponing secret key zeroing till garbage collection can be too\n    // late to be effective, but every little bit helps...\n    runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n    return &sk\n}\n\nfunc (master *SecretKey) DeriveChildEip2333(child_index uint32) *SecretKey {\n    var sk SecretKey\n    C.blst_derive_child_eip2333(&sk.cgo, &master.cgo, C.uint(child_index))\n    // Postponing secret key zeroing till garbage collection can be too\n    // late to be effective, but every little bit helps...\n    runtime.SetFinalizer(&sk, func(sk *SecretKey) { sk.Zeroize() })\n    return &sk\n}\n\n//\n// Pairing\n//\nfunc pairingSizeOf(DST_len C.size_t) int {\n    return int((cgo_pairingSizeOf + DST_len + 7) / 8)\n}\n\nfunc PairingCtx(hash_or_encode bool, DST []byte) Pairing {\n    DST_len := C.size_t(len(DST))\n    ctx := make([]C.blst_pairing, pairingSizeOf(DST_len))\n    C.go_pairing_init(&ctx[0], C.bool(hash_or_encode), ptrOrNil(DST), DST_len)\n    return ctx\n}\n\nfunc PairingCommit(ctx Pairing) {\n    C.blst_pairing_commit(&ctx[0])\n}\n\nfunc PairingMerge(ctx Pairing, ctx1 Pairing) int {\n    r := C.blst_pairing_merge(&ctx[0], &ctx1[0])\n    return int(r)\n}\n\nfunc PairingFinalVerify(ctx Pairing, optional ...*Fp12) bool {\n    var gtsig *Fp12\n    if len(optional) > 0 {\n        gtsig = optional[0]\n    }\n    return bool(C.blst_pairing_finalverify(&ctx[0], gtsig.asPtr()))\n}\n\nfunc PairingRawAggregate(ctx Pairing, q *P2Affine, p *P1Affine) {\n    C.blst_pairing_raw_aggregate(&ctx[0], &q.cgo, &p.cgo)\n}\n\nfunc PairingAsFp12(ctx Pairing) *Fp12 {\n    var pt Fp12\n    C.go_pairing_as_fp12(&pt.cgo, &ctx[0])\n    return &pt\n}\n\nfunc Fp12One() Fp12 {\n    return cgo_fp12One\n}\n\nfunc Fp12FinalVerify(pt1 *Fp12, pt2 *Fp12) bool {\n    return bool(C.blst_fp12_finalverify(&pt1.cgo, &pt2.cgo))\n}\n\nfunc Fp12MillerLoop(q *P2Affine, p *P1Affine) *Fp12 {\n    var pt Fp12\n    C.blst_miller_loop(&pt.cgo, &q.cgo, &p.cgo)\n    return &pt\n}\n\nfunc Fp12MillerLoopN(qs []P2Affine, ps []P1Affine) *Fp12 {\n    if len(qs) != len(ps) || len(qs) == 0 {\n        panic(\"inputs' lengths mismatch\")\n    }\n\n    nElems := uint32(len(qs))\n    nThreads := uint32(maxProcs)\n\n    if nThreads == 1 || nElems == 1 {\n        var pt Fp12\n        C.go_miller_loop_n(&pt.cgo, &qs[0].cgo, &ps[0].cgo, C.size_t(nElems), false)\n        return &pt\n    }\n\n    stride := (nElems + nThreads - 1) / nThreads\n    if stride > 16 {\n        stride = 16\n    }\n\n    strides := (nElems + stride - 1) / stride\n    if nThreads > strides {\n        nThreads = strides\n    }\n\n    msgsCh := make(chan Fp12, nThreads)\n    curElem := uint32(0)\n\n    for tid := uint32(0); tid < nThreads; tid++ {\n        go func() {\n            acc := Fp12One()\n            first := true\n            for {\n                work := atomic.AddUint32(&curElem, stride) - stride\n                if work >= nElems {\n                    break\n                }\n                n := nElems - work\n                if n > stride {\n                    n = stride\n                }\n                C.go_miller_loop_n(&acc.cgo, &qs[work].cgo, &ps[work].cgo, C.size_t(n),\n                                   C.bool(!first))\n                first = false\n            }\n            msgsCh <- acc\n        }()\n    }\n\n    var ret = make([]Fp12, nThreads);\n    for i := range(ret) {\n        ret[i] = <- msgsCh\n    }\n\n    var pt Fp12\n    C.go_fp12slice_mul(&pt.cgo, &ret[0].cgo, C.size_t(nThreads))\n    return &pt\n}\n\nfunc (pt *Fp12) MulAssign(p *Fp12) {\n    C.blst_fp12_mul(&pt.cgo, &pt.cgo, &p.cgo)\n}\n\nfunc (pt *Fp12) FinalExp() {\n    C.blst_final_exp(&pt.cgo, &pt.cgo)\n}\n\nfunc (pt *Fp12) InGroup() bool {\n    return bool(C.blst_fp12_in_group(&pt.cgo))\n}\n\nfunc (pt *Fp12) ToBendian() []byte {\n    var out [BLST_FP_BYTES*12]byte\n    C.blst_bendian_from_fp12((*C.byte)(&out[0]), &pt.cgo)\n    return out[:]\n}\n\nfunc (pt1 *Fp12) Equals(pt2 *Fp12) bool {\n    return *pt1 == *pt2\n}\n\nfunc (pt *Fp12) asPtr() *C.blst_fp12 {\n    if (pt != nil) {\n        return &pt.cgo\n    }\n\n    return nil\n}\n\nfunc ptrOrNil(bytes []byte) *C.byte {\n    var ptr *C.byte\n    if len(bytes) > 0 {\n        ptr = (*C.byte)(&bytes[0])\n    }\n    return ptr\n}\n"
  },
  {
    "path": "bindings/go/blst_htoc_test.go",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\npackage blst\n\nimport (\n\t\"bytes\"\n\t\"encoding/hex\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"os\"\n\t\"strconv\"\n\t\"strings\"\n\t\"testing\"\n)\n\nfunc decodeP1(m map[string]interface{}) *P1Affine {\n\tx, err := hex.DecodeString(m[\"x\"].(string)[2:])\n\tif err != nil {\n\t\tfmt.Println(err)\n\t\treturn nil\n\t}\n\ty, err := hex.DecodeString(m[\"y\"].(string)[2:])\n\tif err != nil {\n\t\tfmt.Println(err)\n\t\treturn nil\n\t}\n\tvar p1 P1Affine\n\tp1.Deserialize(append(x, y...))\n\treturn &p1\n}\n\nfunc readAll(file *os.File) ([]byte, error) {\n\tdefer file.Close()\n\n\tstat, err := file.Stat()\n\tif err != nil {\n\t\treturn nil, err //nolint:wrapcheck\n\t}\n\n\tbuf := make([]byte, stat.Size())\n\ttotal := 0\n\tfor total < len(buf) {\n\t\tread, err := file.Read(buf[total:])\n\t\tif err != nil {\n\t\t\treturn nil, err //nolint:wrapcheck\n\t\t}\n\t\ttotal += read\n\t}\n\n\treturn buf, nil\n}\n\nfunc jsonG1HashToCurve(t *testing.T, fname string) {\n\tt.Helper()\n\tvfile, err := os.Open(fname)\n\tif err != nil {\n\t\tt.Skipf(\"%.16s... not found\", fname)\n\t}\n\tbuf, err := readAll(vfile)\n\tif err != nil {\n\t\tt.Error(err.Error())\n\t}\n\n\tvar vectors map[string]interface{}\n\terr = json.Unmarshal(buf, &vectors)\n\tif err != nil {\n\t\tt.Error(err.Error())\n\t}\n\n\tdst := []byte(vectors[\"dst\"].(string))\n\thash_or_encode := vectors[\"randomOracle\"].(bool)\n\n\tvectorsArr, ok := vectors[\"vectors\"].([]interface{})\n\tif !ok {\n\t\tt.Error(\"Could not cast vectors to an array\")\n\t}\n\n\tfor _, v := range vectorsArr {\n\t\ttestMap, ok := v.(map[string]interface{})\n\t\tif !ok {\n\t\t\tt.Error(\"Could not cast vector to map\")\n\t\t}\n\n\t\tmsg := []byte(testMap[\"msg\"].(string))\n\t\tp1Expected := decodeP1(testMap[\"P\"].(map[string]interface{}))\n\t\tvar p1Hashed *P1Affine\n\t\tif hash_or_encode {\n\t\t\tp1Hashed = HashToG1(msg, dst).ToAffine()\n\t\t} else {\n\t\t\tp1Hashed = EncodeToG1(msg, dst).ToAffine()\n\t\t}\n\n\t\tif !p1Hashed.Equals(p1Expected) {\n\t\t\tt.Error(\"hashed != expected\")\n\t\t}\n\t}\n}\n\nfunc TestG1HashToCurve(t *testing.T) {\n\tt.Parallel()\n\tjsonG1HashToCurve(t, \"../vectors/hash_to_curve/BLS12381G1_XMD_SHA-256_SSWU_RO_.json\")\n\tjsonG1HashToCurve(t, \"../vectors/hash_to_curve/BLS12381G1_XMD_SHA-256_SSWU_NU_.json\")\n}\n\nfunc decodeP2(m map[string]interface{}) *P2Affine {\n\txArr := strings.Split(m[\"x\"].(string), \",\")\n\tx0, err := hex.DecodeString(xArr[0][2:])\n\tif err != nil {\n\t\tfmt.Println(err)\n\t\treturn nil\n\t}\n\tx1, err := hex.DecodeString(xArr[1][2:])\n\tif err != nil {\n\t\tfmt.Println(err)\n\t\treturn nil\n\t}\n\tyArr := strings.Split(m[\"y\"].(string), \",\")\n\ty0, err := hex.DecodeString(yArr[0][2:])\n\tif err != nil {\n\t\tfmt.Println(err)\n\t\treturn nil\n\t}\n\ty1, err := hex.DecodeString(yArr[1][2:])\n\tif err != nil {\n\t\tfmt.Println(err)\n\t\treturn nil\n\t}\n\tvar p2 P2Affine\n\tp2.Deserialize(append(x1, append(x0, append(y1, y0...)...)...))\n\treturn &p2\n}\n\nfunc jsonG2HashToCurve(t *testing.T, fname string) {\n\tt.Helper()\n\tvfile, err := os.Open(fname)\n\tif err != nil {\n\t\tt.Skipf(\"%.16s... not found\", fname)\n\t}\n\tbuf, err := readAll(vfile)\n\tif err != nil {\n\t\tt.Error(err.Error())\n\t}\n\n\tvar vectors map[string]interface{}\n\terr = json.Unmarshal(buf, &vectors)\n\tif err != nil {\n\t\tt.Error(err.Error())\n\t}\n\n\tdst := []byte(vectors[\"dst\"].(string))\n\thash_or_encode := vectors[\"randomOracle\"].(bool)\n\n\tvectorsArr, ok := vectors[\"vectors\"].([]interface{})\n\tif !ok {\n\t\tt.Error(\"Could not cast vectors to an array\")\n\t}\n\n\tfor _, v := range vectorsArr {\n\t\ttestMap, ok := v.(map[string]interface{})\n\t\tif !ok {\n\t\t\tt.Error(\"Could not cast vector to map\")\n\t\t}\n\n\t\tmsg := []byte(testMap[\"msg\"].(string))\n\t\tp2Expected := decodeP2(testMap[\"P\"].(map[string]interface{}))\n\t\tvar p2Hashed *P2Affine\n\t\tif hash_or_encode {\n\t\t\tp2Hashed = HashToG2(msg, dst).ToAffine()\n\t\t} else {\n\t\t\tp2Hashed = EncodeToG2(msg, dst).ToAffine()\n\t\t}\n\n\t\tif !p2Hashed.Equals(p2Expected) {\n\t\t\tt.Error(\"hashed != expected\")\n\t\t}\n\t}\n}\n\nfunc TestG2HashToCurve(t *testing.T) {\n\tt.Parallel()\n\tjsonG2HashToCurve(t, \"../vectors/hash_to_curve/BLS12381G2_XMD_SHA-256_SSWU_RO_.json\")\n\tjsonG2HashToCurve(t, \"../vectors/hash_to_curve/BLS12381G2_XMD_SHA-256_SSWU_NU_.json\")\n}\n\nfunc jsonExpandMessageXmd(t *testing.T, fname string) {\n\tt.Helper()\n\tvfile, err := os.Open(fname)\n\tif err != nil {\n\t\tt.Skipf(\"%.16s... not found\", fname)\n\t}\n\tbuf, err := readAll(vfile)\n\tif err != nil {\n\t\tt.Error(err.Error())\n\t}\n\n\tvar vectors map[string]interface{}\n\terr = json.Unmarshal(buf, &vectors)\n\tif err != nil {\n\t\tt.Error(err.Error())\n\t}\n\n\tDST := []byte(vectors[\"DST\"].(string))\n\n\ttests, ok := vectors[\"tests\"].([]interface{})\n\tif !ok {\n\t\tt.Error(\"Could not cast 'tests' to an array\")\n\t}\n\n\tfor _, v := range tests {\n\t\ttest, ok := v.(map[string]interface{})\n\t\tif !ok {\n\t\t\tt.Error(\"Could not map 'tests[]' element\")\n\t\t}\n\n\t\tlen_in_bytes, err := strconv.ParseInt(test[\"len_in_bytes\"].(string), 0, 0)\n\t\tif err != nil {\n\t\t\tt.Error(err.Error())\n\t\t}\n\t\tmsg := []byte(test[\"msg\"].(string))\n\t\texpected, err := hex.DecodeString(test[\"uniform_bytes\"].(string))\n\t\tif err != nil {\n\t\t\tt.Error(err.Error())\n\t\t}\n\n\t\thashed := expandMessageXmd(msg, DST, int(len_in_bytes))\n\t\tif !bytes.Equal(hashed, expected) {\n\t\t\tt.Error(\"hashed != expected\")\n\t\t}\n\t}\n}\n\nfunc TestExpandMessageXmd(t *testing.T) {\n\tt.Parallel()\n\tjsonExpandMessageXmd(t, \"../vectors/hash_to_curve/expand_message_xmd_SHA256_256.json\")\n\tjsonExpandMessageXmd(t, \"../vectors/hash_to_curve/expand_message_xmd_SHA256_38.json\")\n}\n"
  },
  {
    "path": "bindings/go/blst_miller_loop_test.go",
    "content": "package blst\n\nimport (\n    \"crypto/rand\"\n    \"testing\"\n)\n\nfunc TestMillerLoopN(t *testing.T) {\n    t.Parallel()\n    const npoints = 97\n    scalars := make([]byte, npoints*8)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        t.Error(err.Error())\n        return\n    }\n\n    p1s := make([]P1, npoints)\n    p2s := make([]P2, npoints)\n    g1 := P1Generator()\n    g2 := P2Generator()\n    for i := range p1s {\n        p1s[i] = *g1.Mult(scalars[i*8:i*8+4], 32)\n        p2s[i] = *g2.Mult(scalars[i*8+4:i*8+8], 32)\n    }\n\n    ps := P1s(p1s).ToAffine()\n    qs := P2s(p2s).ToAffine()\n\n    naive := Fp12One()\n    for i := range p1s {\n        naive.MulAssign(Fp12MillerLoop(&qs[i], &ps[i]))\n    }\n\n    if !naive.Equals(Fp12MillerLoopN(qs, ps)) {\n        t.Error(\"failed self-consistency Fp12MillerLoopN test\")\n    }\n}\n"
  },
  {
    "path": "bindings/go/blst_minpk.tgo",
    "content": "\nimport (\n    \"runtime\"\n    \"sync\"\n    \"sync/atomic\"\n)\n\n//\n// PublicKey\n//\n\nfunc (pk *P1Affine) From(s *Scalar) *P1Affine {\n    C.blst_sk_to_pk2_in_g1(nil, &pk.cgo, &s.cgo)\n    return pk\n}\n\nfunc (pk *P1Affine) KeyValidate() bool {\n    return bool(C.go_p1_affine_validate(&pk.cgo, true))\n}\n\n// sigInfcheck, check for infinity, is a way to avoid going\n// into resource-consuming verification. Passing 'false' is\n// always cryptographically safe, but application might want\n// to guard against obviously bogus individual[!] signatures.\nfunc (sig *P2Affine) SigValidate(sigInfcheck bool) bool {\n    return bool(C.go_p2_affine_validate(&sig.cgo, C.bool(sigInfcheck)))\n}\n\n//\n// Sign\n//\n\nfunc (sig *P2Affine) Sign(sk *SecretKey, msg []byte, dst []byte,\n        optional ...interface{}) *P2Affine {\n    augSingle, aug, useHash, ok := parseOpts(optional...)\n    if !ok || len(aug) != 0 {\n        return nil\n    }\n\n    var q *P2\n    if useHash {\n        q = HashToG2(msg, dst, augSingle)\n    } else {\n        q = EncodeToG2(msg, dst, augSingle)\n    }\n    C.blst_sign_pk2_in_g1(nil, &sig.cgo, &q.cgo, &sk.cgo)\n    return sig\n}\n\n//\n// Signature\n//\n\n// Functions to return a signature and public key+augmentation tuple.\n// This enables point decompression (if needed) to happen in parallel.\ntype sigGetterP2 func() *P2Affine\ntype pkGetterP1 func(i uint32, temp *P1Affine) (*P1Affine, []byte)\n\n// Single verify with decompressed pk\nfunc (sig *P2Affine) Verify(sigGroupcheck bool, pk *P1Affine, pkValidate bool,\n        msg Message, dst []byte,\n        optional ...interface{}) bool { // useHash bool, aug []byte\n\n    aug, _, useHash, ok := parseOpts(optional...)\n    if !ok {\n        return false\n    }\n    return sig.AggregateVerify(sigGroupcheck, []*P1Affine{pk}, pkValidate,\n                               []Message{msg}, dst, useHash, [][]byte{aug})\n}\n\n// Single verify with compressed pk\n// Uses a dummy signature to get the correct type\nfunc (dummy *P2Affine) VerifyCompressed(sig []byte, sigGroupcheck bool,\n        pk []byte, pkValidate bool, msg Message, dst []byte,\n        optional ...bool) bool { // useHash bool, usePksAsAugs bool\n\n    return dummy.AggregateVerifyCompressed(sig, sigGroupcheck,\n                                           [][]byte{pk}, pkValidate,\n                                           []Message{msg}, dst, optional...)\n}\n\n// Aggregate verify with uncompressed signature and public keys\n// Note that checking message uniqueness, if required, is left to the user.\n// Not all signature schemes require it and this keeps the binding minimal\n// and fast. Refer to the Uniq function for one method method of performing\n// this check.\nfunc (sig *P2Affine) AggregateVerify(sigGroupcheck bool,\n        pks []*P1Affine, pksVerify bool, msgs []Message, dst []byte,\n        optional ...interface{}) bool { // useHash bool, augs [][]byte\n\n    // sanity checks and argument parsing\n    n := len(pks)\n    if n == 0 || len(msgs) != n {\n        return false\n    }\n    _, augs, useHash, ok := parseOpts(optional...)\n    useAugs := len(augs) != 0\n    if !ok || (useAugs && len(augs) != n) {\n        return false\n    }\n\n    sigFn := func() *P2Affine {\n        return sig\n    }\n\n    pkFn := func(i uint32, _ *P1Affine) (*P1Affine, []byte) {\n        if useAugs {\n            return pks[i], augs[i]\n        }\n        return pks[i], nil\n    }\n\n    return coreAggregateVerifyPkInG1(sigFn, sigGroupcheck, pkFn, pksVerify,\n                                     msgs, dst, useHash)\n}\n\n// Aggregate verify with compressed signature and public keys\n// Uses a dummy signature to get the correct type\nfunc (*P2Affine) AggregateVerifyCompressed(sig []byte, sigGroupcheck bool,\n        pks [][]byte, pksVerify bool, msgs []Message, dst []byte,\n        optional ...bool) bool { // useHash bool, usePksAsAugs bool\n\n    // sanity checks and argument parsing\n    if len(pks) != len(msgs) {\n        return false\n    }\n    useHash := true\n    if len(optional) > 0 {\n        useHash = optional[0]\n    }\n    usePksAsAugs := false\n    if len(optional) > 1 {\n        usePksAsAugs = optional[1]\n    }\n\n    sigFn := func() *P2Affine {\n        sigP := new(P2Affine)\n        if sigP.Uncompress(sig) == nil {\n            return nil\n        }\n        return sigP\n    }\n    pkFn := func(i uint32, pk *P1Affine) (*P1Affine, []byte) {\n        bytes := pks[i]\n        if len(bytes) == BLST_P1_SERIALIZE_BYTES && (bytes[0] & 0x80) == 0 {\n            // Not compressed\n            if pk.Deserialize(bytes) == nil {\n                return nil, nil\n            }\n        } else if len(bytes) == BLST_P1_COMPRESS_BYTES && (bytes[0] & 0x80) != 0 {\n            if pk.Uncompress(bytes) == nil {\n                return nil, nil\n            }\n        } else {\n            return nil, nil\n        }\n        if usePksAsAugs {\n            return pk, bytes\n        }\n        return pk, nil\n    }\n    return coreAggregateVerifyPkInG1(sigFn, sigGroupcheck, pkFn, pksVerify,\n                                     msgs, dst, useHash)\n}\n\nfunc coreAggregateVerifyPkInG1(sigFn sigGetterP2, sigGroupcheck bool,\n        pkFn pkGetterP1, pkValidate bool, msgs []Message, dst []byte,\n        optional ...bool) bool { // useHash\n\n    n := len(msgs)\n    if n == 0 {\n        return false\n    }\n\n    useHash := true\n    if len(optional) > 0 {\n        useHash = optional[0]\n    }\n\n    numCores := runtime.GOMAXPROCS(0)\n    numThreads := numThreads(n)\n\n    // Each thread will determine next message to process by atomically\n    // incrementing curItem, process corresponding pk,msg[,aug] tuple and\n    // repeat until n is exceeded.  The resulting accumulations will be\n    // fed into the msgsCh channel.\n    msgsCh := make(chan Pairing, numThreads)\n    valid := int32(1)\n    curItem := uint32(0)\n    mutex := sync.Mutex{}\n\n    mutex.Lock()\n    for tid := 0; tid < numThreads; tid++ {\n        go func() {\n            pairing := PairingCtx(useHash, dst)\n            var temp P1Affine\n            for atomic.LoadInt32(&valid) > 0 {\n                // Get a work item\n                work := atomic.AddUint32(&curItem, 1) - 1\n                if work >= uint32(n) {\n                    break\n                } else if work == 0 && maxProcs == numCores-1 &&\n                    numThreads == maxProcs {\n                    // Avoid consuming all cores by waiting until the\n                    // main thread has completed its miller loop before\n                    // proceeding.\n                    mutex.Lock()\n                    mutex.Unlock() //nolint:staticcheck\n                }\n\n                // Pull Public Key and augmentation blob\n                curPk, aug := pkFn(work, &temp)\n                if curPk == nil {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n\n                // Pairing and accumulate\n                ret := PairingAggregatePkInG1(pairing, curPk, pkValidate,\n                                              nil, false, msgs[work], aug)\n                if ret != C.BLST_SUCCESS {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n\n                // application might have some async work to do\n                runtime.Gosched()\n            }\n            if atomic.LoadInt32(&valid) > 0 {\n                PairingCommit(pairing)\n                msgsCh <- pairing\n            } else {\n                msgsCh <- nil\n            }\n        }()\n    }\n\n    // Uncompress and check signature\n    var gtsig Fp12\n    sig := sigFn()\n    if sig == nil {\n        atomic.StoreInt32(&valid, 0)\n    }\n    if atomic.LoadInt32(&valid) > 0 && sigGroupcheck &&\n       !sig.SigValidate(false) {\n        atomic.StoreInt32(&valid, 0)\n    }\n    if atomic.LoadInt32(&valid) > 0 {\n        C.blst_aggregated_in_g2(&gtsig.cgo, &sig.cgo)\n    }\n    mutex.Unlock()\n\n    // Accumulate the thread results\n    var pairings Pairing\n    for i := 0; i < numThreads; i++ {\n        msg := <-msgsCh\n        if msg != nil {\n            if pairings == nil {\n                pairings = msg\n            } else {\n                ret := PairingMerge(pairings, msg)\n                if ret != C.BLST_SUCCESS {\n                    atomic.StoreInt32(&valid, 0)\n                }\n            }\n        }\n    }\n    if atomic.LoadInt32(&valid) == 0 || pairings == nil {\n        return false\n    }\n\n    return PairingFinalVerify(pairings, &gtsig)\n}\n\nfunc CoreVerifyPkInG1(pk *P1Affine, sig *P2Affine, hash_or_encode bool,\n        msg Message, dst []byte, optional ...[]byte) int {\n\n    var aug []byte\n    if len(optional) > 0 {\n        aug = optional[0]\n    }\n\n    if runtime.NumGoroutine() < maxProcs {\n        sigFn := func() *P2Affine {\n            return sig\n        }\n        pkFn := func(_ uint32, _ *P1Affine) (*P1Affine, []byte) {\n            return pk, aug\n        }\n        if !coreAggregateVerifyPkInG1(sigFn, true, pkFn, true, []Message{msg},\n                                      dst, hash_or_encode) {\n            return C.BLST_VERIFY_FAIL\n        }\n        return C.BLST_SUCCESS\n    }\n\n    return int(C.blst_core_verify_pk_in_g1(&pk.cgo, &sig.cgo, C.bool(hash_or_encode),\n                                           ptrOrNil(msg), C.size_t(len(msg)),\n                                           ptrOrNil(dst), C.size_t(len(dst)),\n                                           ptrOrNil(aug), C.size_t(len(aug))))\n}\n\n// pks are assumed to be verified for proof of possession,\n// which implies that they are already group-checked\nfunc (sig *P2Affine) FastAggregateVerify(sigGroupcheck bool,\n        pks []*P1Affine, msg Message, dst []byte,\n        optional ...interface{}) bool { // pass-through to Verify\n    n := len(pks)\n\n    // TODO: return value for length zero?\n    if n == 0 {\n        return false\n    }\n\n    aggregator := new(P1Aggregate)\n    if !aggregator.Aggregate(pks, false) {\n        return false\n    }\n    pkAff := aggregator.ToAffine()\n\n    // Verify\n    return sig.Verify(sigGroupcheck, pkAff, false, msg, dst, optional...)\n}\n\nfunc (*P2Affine) MultipleAggregateVerify(sigs []*P2Affine,\n        sigsGroupcheck bool, pks []*P1Affine, pksVerify bool,\n        msgs []Message, dst []byte, randFn func(*Scalar), randBits int,\n        optional ...interface{}) bool { // useHash\n\n    // Sanity checks and argument parsing\n    n := len(pks)\n    if n == 0 || len(msgs) != n || len(sigs) != n {\n        return false\n    }\n    _, augs, useHash, ok := parseOpts(optional...)\n    useAugs := len(augs) != 0\n    if !ok || (useAugs && len(augs) != n) {\n        return false\n    }\n\n    paramsFn :=\n        func(work uint32, _ *P2Affine, _ *P1Affine, rand *Scalar) (\n            *P2Affine, *P1Affine, *Scalar, []byte) {\n            randFn(rand)\n            var aug []byte\n            if useAugs {\n                aug = augs[work]\n            }\n            return sigs[work], pks[work], rand, aug\n        }\n\n    return multipleAggregateVerifyPkInG1(paramsFn, sigsGroupcheck, pksVerify,\n                                         msgs, dst, randBits, useHash)\n}\n\ntype mulAggGetterPkInG1 func(work uint32, sig *P2Affine, pk *P1Affine,\n    rand *Scalar) (*P2Affine, *P1Affine, *Scalar, []byte)\n\nfunc multipleAggregateVerifyPkInG1(paramsFn mulAggGetterPkInG1,\n        sigsGroupcheck bool, pksVerify bool, msgs []Message,\n        dst []byte, randBits int,\n        optional ...bool) bool { // useHash\n    n := len(msgs)\n    if n == 0 {\n        return false\n    }\n\n    useHash := true\n    if len(optional) > 0 {\n        useHash = optional[0]\n    }\n\n    numThreads := numThreads(n)\n\n    // Each thread will determine next message to process by atomically\n    // incrementing curItem, process corresponding pk,msg[,aug] tuple and\n    // repeat until n is exceeded.  The resulting accumulations will be\n    // fed into the msgsCh channel.\n    msgsCh := make(chan Pairing, numThreads)\n    valid := int32(1)\n    curItem := uint32(0)\n\n    for tid := 0; tid < numThreads; tid++ {\n        go func() {\n            pairing := PairingCtx(useHash, dst)\n            var tempRand Scalar\n            var tempPk P1Affine\n            var tempSig P2Affine\n            for atomic.LoadInt32(&valid) > 0 {\n                // Get a work item\n                work := atomic.AddUint32(&curItem, 1) - 1\n                if work >= uint32(n) {\n                    break\n                }\n\n                curSig, curPk, curRand, aug := paramsFn(work, &tempSig,\n                                                        &tempPk, &tempRand)\n\n                if PairingMulNAggregatePkInG1(pairing, curPk, pksVerify,\n                                              curSig, sigsGroupcheck, curRand,\n                                              randBits, msgs[work], aug) !=\n                        C.BLST_SUCCESS {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n\n                // application might have some async work to do\n                runtime.Gosched()\n            }\n            if atomic.LoadInt32(&valid) > 0 {\n                PairingCommit(pairing)\n                msgsCh <- pairing\n            } else {\n                msgsCh <- nil\n            }\n        }()\n    }\n\n    // Accumulate the thread results\n    var pairings Pairing\n    for i := 0; i < numThreads; i++ {\n        msg := <-msgsCh\n        if msg != nil {\n            if pairings == nil {\n                pairings = msg\n            } else {\n                ret := PairingMerge(pairings, msg)\n                if ret != C.BLST_SUCCESS {\n                    atomic.StoreInt32(&valid, 0)\n                }\n            }\n        }\n    }\n    if atomic.LoadInt32(&valid) == 0 || pairings == nil {\n        return false\n    }\n\n    return PairingFinalVerify(pairings, nil)\n}\n\n//\n// Aggregate P2\n//\n\ntype aggGetterP2 func(i uint32, temp *P2Affine) *P2Affine\ntype P2Aggregate struct {\n    v *P2\n}\n\n// Aggregate uncompressed elements\nfunc (agg *P2Aggregate) Aggregate(elmts []*P2Affine,\n        groupcheck bool) bool {\n    if len(elmts) == 0 {\n        return true\n    }\n    getter := func(i uint32, _ *P2Affine) *P2Affine { return elmts[i] }\n    return agg.coreAggregate(getter, groupcheck, len(elmts))\n}\n\nfunc (agg *P2Aggregate) AggregateWithRandomness(pointsIf interface{},\n        scalarsIf interface{}, nbits int, groupcheck bool) bool {\n    if groupcheck && !P2AffinesValidate(pointsIf) {\n        return false\n    }\n    agg.v = P2AffinesMult(pointsIf, scalarsIf, nbits)\n    return true\n}\n\n// Aggregate compressed elements\nfunc (agg *P2Aggregate) AggregateCompressed(elmts [][]byte,\n        groupcheck bool) bool {\n    if len(elmts) == 0 {\n        return true\n    }\n    getter := func(i uint32, p *P2Affine) *P2Affine {\n        bytes := elmts[i]\n        if p.Uncompress(bytes) == nil {\n            return nil\n        }\n        return p\n    }\n    return agg.coreAggregate(getter, groupcheck, len(elmts))\n}\n\nfunc (agg *P2Aggregate) AddAggregate(other *P2Aggregate) {\n    if other.v == nil {\n        // do nothing\n    } else if agg.v == nil {\n        agg.v = other.v\n    } else {\n        C.blst_p2_add_or_double(&agg.v.cgo, &agg.v.cgo, &other.v.cgo)\n    }\n}\n\nfunc (agg *P2Aggregate) Add(elmt *P2Affine, groupcheck bool) bool {\n    if groupcheck && !bool(C.blst_p2_affine_in_g2(&elmt.cgo)) {\n        return false\n    }\n    if agg.v == nil {\n        agg.v = new(P2)\n        C.blst_p2_from_affine(&agg.v.cgo, &elmt.cgo)\n    } else {\n        C.blst_p2_add_or_double_affine(&agg.v.cgo, &agg.v.cgo, &elmt.cgo)\n    }\n    return true\n}\n\nfunc (agg *P2Aggregate) ToAffine() *P2Affine {\n    if agg.v == nil {\n        return new(P2Affine)\n    }\n    return agg.v.ToAffine()\n}\n\nfunc (agg *P2Aggregate) coreAggregate(getter aggGetterP2, groupcheck bool,\n        n int) bool {\n\n    if n == 0 {\n        return true\n    }\n    // operations are considered short enough for not to care about\n    // keeping one core free...\n    numThreads := runtime.GOMAXPROCS(0)\n    if numThreads > n {\n        numThreads = n\n    }\n\n    valid := int32(1)\n    type result struct {\n        agg   *P2\n        empty bool\n    }\n    msgs := make(chan result, numThreads)\n    curItem := uint32(0)\n    for tid := 0; tid < numThreads; tid++ {\n        go func() {\n            first := true\n            var agg P2\n            var temp P2Affine\n            for atomic.LoadInt32(&valid) > 0 {\n                // Get a work item\n                work := atomic.AddUint32(&curItem, 1) - 1\n                if work >= uint32(n) {\n                    break\n                }\n\n                // Signature validate\n                curElmt := getter(work, &temp)\n                if curElmt == nil {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n                if groupcheck && !bool(C.blst_p2_affine_in_g2(&curElmt.cgo)) {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n                if first {\n                    C.blst_p2_from_affine(&agg.cgo, &curElmt.cgo)\n                    first = false\n                } else {\n                    C.blst_p2_add_or_double_affine(&agg.cgo, &agg.cgo, &curElmt.cgo)\n                }\n                // application might have some async work to do\n                runtime.Gosched()\n            }\n            if first {\n                msgs <- result{nil, true}\n            } else if atomic.LoadInt32(&valid) > 0 {\n                msgs <- result{&agg, false}\n            } else {\n                msgs <- result{nil, false}\n            }\n        }()\n    }\n\n    // Accumulate the thread results\n    first := agg.v == nil\n    validLocal := true\n    for i := 0; i < numThreads; i++ {\n        msg := <-msgs\n        if !validLocal || msg.empty {\n            // do nothing\n        } else if msg.agg == nil {\n            validLocal = false\n            // This should be unnecessary but seems safer\n            atomic.StoreInt32(&valid, 0)\n        } else {\n            if first {\n                agg.v = msg.agg\n                first = false\n            } else {\n                C.blst_p2_add_or_double(&agg.v.cgo, &agg.v.cgo, &msg.agg.cgo)\n            }\n        }\n    }\n    if atomic.LoadInt32(&valid) == 0 {\n        agg.v = nil\n        return false\n    }\n    return true\n}\n"
  },
  {
    "path": "bindings/go/blst_minpk_test.go",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\npackage blst\n\nimport (\n    \"crypto/rand\"\n    \"fmt\"\n    \"runtime\"\n    \"testing\"\n)\n\n// Min PK.\ntype PublicKeyMinPk = P1Affine\ntype SignatureMinPk = P2Affine\ntype AggregateSignatureMinPk = P2Aggregate\ntype AggregatePublicKeyMinPk = P1Aggregate\n\n// Names in this file must be unique to support min-sig so we can't use 'dst'\n// here.\nvar dstMinPk = []byte(\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_NUL_\")\n\nfunc init() {\n    // Use all cores when testing and benchmarking\n    SetMaxProcs(runtime.GOMAXPROCS(0))\n}\n\nfunc TestInfinityMinPk(t *testing.T) {\n    t.Parallel()\n    var infComp [BLST_P1_COMPRESS_BYTES]byte\n    infComp[0] |= 0xc0\n    new(PublicKeyMinPk).Uncompress(infComp[:])\n}\n\nfunc TestSerdesMinPk(t *testing.T) {\n    t.Parallel()\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk := KeyGen(ikm[:])\n    defer sk.Zeroize()\n\n    // Serialize/deserialize sk\n    sk2 := new(SecretKey).Deserialize(sk.Serialize())\n    defer sk2.Zeroize()\n    if !sk.Equals(sk2) {\n        t.Error(\"sk2 != sk\")\n    }\n\n    // Negative test equals\n    sk.cgo.b[0]++\n    if sk.Equals(sk2) {\n        t.Error(\"sk2 == sk\")\n    }\n\n    // pk\n    pk := new(PublicKeyMinPk).From(sk)\n\n    // Compress/decompress sk\n    pk2 := new(PublicKeyMinPk).Uncompress(pk.Compress())\n    if !pk.Equals(pk2) {\n        t.Error(\"pk2 != pk\")\n    }\n\n    // Serialize/deserialize sk\n    pk3 := new(PublicKeyMinPk).Deserialize(pk.Serialize())\n    if !pk.Equals(pk3) {\n        t.Error(\"pk3 != pk\")\n    }\n\n    // Negative test equals\n    // pk.x.l[0] = pk.x.l[0] + 1\n    // if pk.Equals(pk2) {\n    //  t.Error(\"pk2 == pk\")\n    // }\n}\n\nfunc TestSignVerifyMinPk(t *testing.T) {\n    t.Parallel()\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk0 := KeyGen(ikm[:])\n    ikm[0]++\n    sk1 := KeyGen(ikm[:])\n\n    // pk\n    pk0 := new(PublicKeyMinPk).From(sk0)\n    pk1 := new(PublicKeyMinPk).From(sk1)\n\n    // Sign\n    msg0 := []byte(\"hello foo\")\n    msg1 := []byte(\"hello bar!\")\n    sig0 := new(SignatureMinPk).Sign(sk0, msg0, dstMinPk)\n    sig1 := new(SignatureMinPk).Sign(sk1, msg1, dstMinPk)\n\n    // Verify\n    if !sig0.Verify(true, pk0, false, msg0, dstMinPk) {\n        t.Error(\"verify sig0\")\n    }\n    if !sig1.Verify(true, pk1, false, msg1, dstMinPk) {\n        t.Error(\"verify sig1\")\n    }\n    if !new(SignatureMinPk).VerifyCompressed(sig1.Compress(), true,\n                                             pk1.Compress(), false,\n                                             msg1, dstMinPk) {\n        t.Error(\"verify sig1\")\n    }\n    // Batch verify\n    if !sig0.AggregateVerify(true, []*PublicKeyMinPk{pk0}, false,\n                             []Message{msg0}, dstMinPk) {\n        t.Error(\"aggregate verify sig0\")\n    }\n    // Verify compressed inputs\n    if !new(SignatureMinPk).AggregateVerifyCompressed(sig0.Compress(), true,\n                                                      [][]byte{pk0.Compress()},\n                                                      false,\n                                                      []Message{msg0}, dstMinPk) {\n        t.Error(\"aggregate verify sig0 compressed\")\n    }\n\n    // Verify serialized inputs\n    if !new(SignatureMinPk).AggregateVerifyCompressed(sig0.Compress(), true,\n                                                      [][]byte{pk0.Serialize()},\n                                                      false,\n                                                      []Message{msg0}, dstMinPk) {\n        t.Error(\"aggregate verify sig0 serialized\")\n    }\n\n    // Compressed with empty pk\n    var emptyPk []byte\n    if new(SignatureMinPk).VerifyCompressed(sig0.Compress(), true,\n                                            emptyPk, false, msg0, dstMinPk) {\n        t.Error(\"verify sig compressed inputs\")\n    }\n    // Wrong message\n    if sig0.Verify(true, pk0, false, msg1, dstMinPk) {\n        t.Error(\"Expected Verify to return false\")\n    }\n    // Wrong key\n    if sig0.Verify(true, pk1, false, msg0, dstMinPk) {\n        t.Error(\"Expected Verify to return false\")\n    }\n    // Wrong sig\n    if sig1.Verify(true, pk0, false, msg0, dstMinPk) {\n        t.Error(\"Expected Verify to return false\")\n    }\n}\n\nfunc TestSignVerifyAugMinPk(t *testing.T) {\n    t.Parallel()\n    sk := genRandomKeyMinPk()\n    pk := new(PublicKeyMinPk).From(sk)\n    msg := []byte(\"hello foo\")\n    aug := []byte(\"augmentation\")\n    sig := new(SignatureMinPk).Sign(sk, msg, dstMinPk, aug)\n    if !sig.Verify(true, pk, false, msg, dstMinPk, aug) {\n        t.Error(\"verify sig\")\n    }\n    aug2 := []byte(\"augmentation2\")\n    if sig.Verify(true, pk, false, msg, dstMinPk, aug2) {\n        t.Error(\"verify sig, wrong augmentation\")\n    }\n    if sig.Verify(true, pk, false, msg, dstMinPk) {\n        t.Error(\"verify sig, no augmentation\")\n    }\n    // TODO: augmentation with aggregate verify\n}\n\nfunc TestSignVerifyEncodeMinPk(t *testing.T) {\n    t.Parallel()\n    sk := genRandomKeyMinPk()\n    pk := new(PublicKeyMinPk).From(sk)\n    msg := []byte(\"hello foo\")\n    sig := new(SignatureMinPk).Sign(sk, msg, dstMinPk, false)\n    if !sig.Verify(true, pk, false, msg, dstMinPk, false) {\n        t.Error(\"verify sig\")\n    }\n    if sig.Verify(true, pk, false, msg, dstMinPk) {\n        t.Error(\"verify sig expected fail, wrong hashing engine\")\n    }\n    if sig.Verify(true, pk, false, msg, dstMinPk, 0) {\n        t.Error(\"verify sig expected fail, illegal argument\")\n    }\n}\n\nfunc TestSignVerifyAggregateMinPk(t *testing.T) {\n    t.Parallel()\n    for size := 1; size < 20; size++ {\n        sks, msgs, _, pubks, _, err :=\n            generateBatchTestDataUncompressedMinPk(size)\n        if err {\n            t.Error(\"Error generating test data\")\n            return\n        }\n\n        // All signers sign the same message\n        sigs := make([]*SignatureMinPk, 0)\n        for i := 0; i < size; i++ {\n            sigs = append(sigs, new(SignatureMinPk).Sign(sks[i], msgs[0],\n                dstMinPk))\n        }\n        agProj := new(AggregateSignatureMinPk)\n        if !agProj.Aggregate(sigs, false) {\n            t.Error(\"Aggregate unexpectedly returned nil\")\n            return\n        }\n        agSig := agProj.ToAffine()\n\n        if !agSig.FastAggregateVerify(false, pubks, msgs[0], dstMinPk) {\n            t.Errorf(\"failed to verify size %d\", size)\n        }\n\n        // Negative test\n        if agSig.FastAggregateVerify(false, pubks, msgs[0][1:], dstMinPk) {\n            t.Errorf(\"failed to not verify size %d\", size)\n        }\n\n        // Test compressed signature aggregation\n        compSigs := make([][]byte, size)\n        for i := 0; i < size; i++ {\n            compSigs[i] = sigs[i].Compress()\n        }\n        agProj = new(AggregateSignatureMinPk)\n        if !agProj.AggregateCompressed(compSigs, false) {\n            t.Error(\"AggregateCompressed unexpectedly returned nil\")\n            return\n        }\n        agSig = agProj.ToAffine()\n        if !agSig.FastAggregateVerify(false, pubks, msgs[0], dstMinPk) {\n            t.Errorf(\"failed to verify size %d\", size)\n        }\n\n        // Negative test\n        if agSig.FastAggregateVerify(false, pubks, msgs[0][1:], dstMinPk) {\n            t.Errorf(\"failed to not verify size %d\", size)\n        }\n    }\n}\n\nfunc TestSignMultipleVerifyAggregateMinPk(t *testing.T) {\n    t.Parallel()\n    msgCount := 5\n    for size := 1; size < 20; size++ {\n        msgs := make([]Message, 0)\n        sks := make([]*SecretKey, 0)\n        pks := make([]*PublicKeyMinPk, 0)\n\n        // Generate messages\n        for i := 0; i < msgCount; i++ {\n            msg := Message(fmt.Sprintf(\"blst is a blast!! %d %d\", i, size))\n            msgs = append(msgs, msg)\n        }\n\n        // Generate keypairs\n        for i := 0; i < size; i++ {\n            priv := genRandomKeyMinPk()\n            sks = append(sks, priv)\n            pks = append(pks, new(PublicKeyMinPk).From(priv))\n        }\n\n        // All signers sign each message\n        aggSigs := make([]*SignatureMinPk, 0)\n        aggPks := make([]*PublicKeyMinPk, 0)\n        for i := 0; i < msgCount; i++ {\n            sigsToAgg := make([]*SignatureMinPk, 0)\n            pksToAgg := make([]*PublicKeyMinPk, 0)\n            for j := 0; j < size; j++ {\n                sigsToAgg = append(sigsToAgg,\n                                   new(SignatureMinPk).Sign(sks[j], msgs[i],\n                                                            dstMinPk))\n                pksToAgg = append(pksToAgg, pks[j])\n            }\n\n            agSig := new(AggregateSignatureMinPk)\n            if !agSig.Aggregate(sigsToAgg, true) {\n                t.Error(\"failed to aggregate\")\n            }\n            afSig := agSig.ToAffine()\n            agPk := new(AggregatePublicKeyMinPk)\n            agPk.Aggregate(pksToAgg, false)\n            afPk := agPk.ToAffine()\n            aggSigs = append(aggSigs, afSig)\n            aggPks = append(aggPks, afPk)\n\n            // Verify aggregated signature and pk\n            if !afSig.Verify(false, afPk, false, msgs[i], dstMinPk) {\n                t.Errorf(\"failed to verify single aggregate size %d\", size)\n            }\n\n        }\n\n        randFn := func(s *Scalar) {\n            var rbytes [BLST_SCALAR_BYTES]byte\n            _, err := rand.Read(rbytes[:])\n            if err != nil {\n                t.Error(err.Error())\n            }\n            s.FromBEndian(rbytes[:])\n        }\n\n        // Verify\n        randBits := 64\n        if !new(SignatureMinPk).MultipleAggregateVerify(aggSigs, true,\n                                                        aggPks, false,\n                                                        msgs, dstMinPk,\n                                                        randFn, randBits) {\n            t.Errorf(\"failed to verify multiple aggregate size %d\", size)\n        }\n\n        // Negative test\n        if new(SignatureMinPk).MultipleAggregateVerify(aggSigs, true,\n                                                       aggPks, false,\n                                                       msgs, dstMinPk[1:],\n                                                       randFn, randBits) {\n            t.Errorf(\"failed to not verify multiple aggregate size %d\", size)\n        }\n    }\n}\n\nfunc TestBatchUncompressMinPk(t *testing.T) {\n    t.Parallel()\n    size := 128\n    var points []*P2Affine\n    var compPoints [][]byte\n\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        p2 := HashToG2(msg, dstMinPk).ToAffine()\n        points = append(points, p2)\n        compPoints = append(compPoints, p2.Compress())\n    }\n    uncompPoints := new(SignatureMinPk).BatchUncompress(compPoints)\n    if uncompPoints == nil {\n        t.Errorf(\"BatchUncompress returned nil size %d\", size)\n    }\n    for i := 0; i < size; i++ {\n        if !points[i].Equals(uncompPoints[i]) {\n            t.Errorf(\"Uncompressed point does not equal initial point %d\", i)\n        }\n    }\n}\n\nfunc BenchmarkCoreSignMinPk(b *testing.B) {\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk := KeyGen(ikm[:])\n    defer sk.Zeroize()\n    msg := []byte(\"hello foo\")\n    for i := 0; i < b.N; i++ {\n        new(SignatureMinPk).Sign(sk, msg, dstMinPk)\n    }\n}\n\nfunc BenchmarkCoreVerifyMinPk(b *testing.B) {\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk := KeyGen(ikm[:])\n    defer sk.Zeroize()\n    pk := new(PublicKeyMinPk).From(sk)\n    msg := []byte(\"hello foo\")\n    sig := new(SignatureMinPk).Sign(sk, msg, dstMinPk)\n\n    // Verify\n    for i := 0; i < b.N; i++ {\n        if !sig.Verify(true, pk, false, msg, dstMinPk) {\n            b.Fatal(\"verify sig\")\n        }\n    }\n}\n\nfunc BenchmarkCoreVerifyAggregateMinPk(b *testing.B) {\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            msgs, _, pubks, agsig, err := generateBatchTestDataMinPk(size)\n            if err {\n                b.Fatal(\"Error generating test data\")\n            }\n            b.ResetTimer()\n            for i := 0; i < b.N; i++ {\n                if !new(SignatureMinPk).AggregateVerifyCompressed(agsig, true,\n                                                                  pubks, false,\n                                                                  msgs, dstMinPk) {\n                    b.Fatal(\"failed to verify\")\n                }\n            }\n        }\n    }\n\n    b.Run(\"1\", run(1))\n    b.Run(\"10\", run(10))\n    b.Run(\"50\", run(50))\n    b.Run(\"100\", run(100))\n    b.Run(\"300\", run(300))\n    b.Run(\"1000\", run(1000))\n    b.Run(\"4000\", run(4000))\n}\n\nfunc BenchmarkVerifyAggregateUncompressedMinPk(b *testing.B) {\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            _, msgs, _, pubks, agsig, err :=\n                generateBatchTestDataUncompressedMinPk(size)\n            if err {\n                b.Fatal(\"Error generating test data\")\n            }\n            b.ResetTimer()\n            for i := 0; i < b.N; i++ {\n                if !agsig.AggregateVerify(true, pubks, false, msgs, dstMinPk) {\n                    b.Fatal(\"failed to verify\")\n                }\n            }\n        }\n    }\n\n    b.Run(\"1\", run(1))\n    b.Run(\"10\", run(10))\n    b.Run(\"50\", run(50))\n    b.Run(\"100\", run(100))\n    b.Run(\"300\", run(300))\n    b.Run(\"1000\", run(1000))\n    b.Run(\"4000\", run(4000))\n}\n\nfunc BenchmarkCoreAggregateMinPk(b *testing.B) {\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            _, sigs, _, _, err := generateBatchTestDataMinPk(size)\n            if err {\n                b.Fatal(\"Error generating test data\")\n            }\n            b.ResetTimer()\n            for i := 0; i < b.N; i++ {\n                var agg AggregateSignatureMinPk\n                agg.AggregateCompressed(sigs, true)\n            }\n        }\n    }\n\n    b.Run(\"1\", run(1))\n    b.Run(\"10\", run(10))\n    b.Run(\"50\", run(50))\n    b.Run(\"100\", run(100))\n    b.Run(\"300\", run(300))\n    b.Run(\"1000\", run(1000))\n    b.Run(\"4000\", run(4000))\n}\n\nfunc genRandomKeyMinPk() *SecretKey {\n    // Generate 32 bytes of randomness\n    var ikm [32]byte\n    _, err := rand.Read(ikm[:])\n\n    if err != nil {\n        return nil\n    }\n    return KeyGen(ikm[:])\n}\n\nfunc generateBatchTestDataMinPk(size int) (msgs []Message,\n    sigs [][]byte, pubks [][]byte, agsig []byte, err bool) {\n    err = false\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        msgs = append(msgs, msg)\n        priv := genRandomKeyMinPk()\n        sigs = append(sigs, new(SignatureMinPk).Sign(priv, msg, dstMinPk).\n            Compress())\n        pubks = append(pubks, new(PublicKeyMinPk).From(priv).Compress())\n    }\n    agProj := new(AggregateSignatureMinPk)\n    if !agProj.AggregateCompressed(sigs, true) {\n        fmt.Println(\"AggregateCompressed unexpectedly returned nil\")\n        err = true\n        return //nolint:revive\n    }\n    agAff := agProj.ToAffine()\n    if agAff == nil {\n        fmt.Println(\"ToAffine unexpectedly returned nil\")\n        err = true\n        return //nolint:revive\n    }\n    agsig = agAff.Compress()\n    return //nolint:revive\n}\n\nfunc generateBatchTestDataUncompressedMinPk(size int) (sks []*SecretKey,\n    msgs []Message, sigs []*SignatureMinPk, //nolint:unparam\n    pubks []*PublicKeyMinPk, agsig *SignatureMinPk, err bool) {\n    err = false\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        msgs = append(msgs, msg)\n        priv := genRandomKeyMinPk()\n        sks = append(sks, priv)\n        sigs = append(sigs, new(SignatureMinPk).Sign(priv, msg, dstMinPk))\n        pubks = append(pubks, new(PublicKeyMinPk).From(priv))\n    }\n    agProj := new(AggregateSignatureMinPk)\n    if !agProj.Aggregate(sigs, true) {\n        fmt.Println(\"Aggregate unexpectedly returned nil\")\n        err = true\n        return //nolint:revive\n    }\n    agsig = agProj.ToAffine()\n    return //nolint:revive\n}\n\nfunc BenchmarkBatchUncompressMinPk(b *testing.B) {\n    size := 128\n    var compPoints [][]byte\n\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        p2 := HashToG2(msg, dstMinPk).ToAffine()\n        compPoints = append(compPoints, p2.Compress())\n    }\n    b.Run(\"Single\", func(b *testing.B) {\n        b.ResetTimer()\n        b.ReportAllocs()\n        var tmp SignatureMinPk\n        for i := 0; i < b.N; i++ {\n            for j := 0; j < size; j++ {\n                if tmp.Uncompress(compPoints[j]) == nil {\n                    b.Fatal(\"could not uncompress point\")\n                }\n            }\n        }\n    })\n    b.Run(\"Batch\", func(b *testing.B) {\n        b.ResetTimer()\n        b.ReportAllocs()\n        var tmp SignatureMinPk\n        for i := 0; i < b.N; i++ {\n            if tmp.BatchUncompress(compPoints) == nil {\n                b.Fatal(\"could not batch uncompress points\")\n            }\n        }\n    })\n}\n\nfunc TestSignVerifyAggregateValidatesInfinitePubkeyMinPk(t *testing.T) {\n    t.Parallel()\n    size := 20\n    sks, msgs, _, pubks, _, err :=\n      generateBatchTestDataUncompressedMinPk(size)\n    if err {\n        t.Error(\"Error generating test data\")\n        return\n    }\n\n    // All signers sign the same message\n    sigs := make([]*SignatureMinPk, size)\n    for i := range sigs {\n        sigs[i] = new(SignatureMinPk).Sign(sks[i], msgs[i], dstMinPk)\n    }\n\n    // Single message: Infinite pubkeys and signature\n    zeroKey := new(PublicKeyMinPk)\n    zeroSig := new(SignatureMinPk)\n    agProj := new(AggregateSignatureMinPk)\n    if !agProj.Aggregate([]*SignatureMinPk{zeroSig}, false) {\n        t.Error(\"Aggregate unexpectedly returned nil\")\n        return\n    }\n    agSig := agProj.ToAffine()\n\n    if agSig.AggregateVerify(false, []*PublicKeyMinPk{zeroKey}, false,\n                             [][]byte{msgs[0]}, dstMinPk) {\n        t.Error(\"failed to NOT verify signature\")\n    }\n\n    // Replace firstkey with infinite pubkey.\n    pubks[0] = zeroKey\n    sigs[0] = zeroSig\n    agProj = new(AggregateSignatureMinPk)\n    if !agProj.Aggregate(sigs, false) {\n        t.Error(\"Aggregate unexpectedly returned nil\")\n        return\n    }\n    agSig = agProj.ToAffine()\n\n    if agSig.AggregateVerify(false, pubks, false, msgs, dstMinPk) {\n        t.Error(\"failed to NOT verify signature\")\n    }\n}\n\nfunc TestEmptyMessageMinPk(t *testing.T) {\n    t.Parallel()\n    msg := []byte(\"\")\n    var sk_bytes = []byte {99, 64, 58, 175, 15, 139, 113, 184, 37, 222, 127,\n        204, 233, 209, 34, 8, 61, 27, 85, 251, 68, 31, 255, 214, 8, 189, 190, 71,\n        198, 16, 210, 91};\n    sk := new(SecretKey).Deserialize(sk_bytes)\n    pk := new(PublicKeyMinPk).From(sk)\n    sig := new(SignatureMinPk).Sign(sk, msg, dstMinPk)\n    if !new(SignatureMinPk).VerifyCompressed(sig.Compress(), true,\n        pk.Compress(), false, msg, dstMinPk) {\n        t.Error(\"failed to verify empty message\")\n    }\n}\n\nfunc TestEmptySignatureMinPk(t *testing.T) {\n    t.Parallel()\n    msg := []byte(\"message\")\n    var sk_bytes = []byte {99, 64, 58, 175, 15, 139, 113, 184, 37, 222, 127,\n        204, 233, 209, 34, 8, 61, 27, 85, 251, 68, 31, 255, 214, 8, 189, 190, 71,\n        198, 16, 210, 91};\n    sk := new(SecretKey).Deserialize(sk_bytes)\n    pk := new(PublicKeyMinPk).From(sk)\n    var emptySig []byte\n    if new(SignatureMinPk).VerifyCompressed(emptySig, true, pk.Compress(), false, msg, dstMinPk) {\n        t.Error(\"failed to NOT verify empty signature\")\n    }\n}\n\nfunc TestMultiScalarP1(t *testing.T) {\n    t.Parallel()\n    const npoints = 1027\n    scalars := make([]byte, npoints*16)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        t.Error(err.Error())\n\treturn\n    }\n    points := make([]P1, npoints)\n    refs   := make([]P1, npoints)\n    generator := P1Generator()\n    for i := range points {\n        points[i] = *generator.Mult(scalars[i*4:(i+1)*4])\n        refs[i]   = *points[i].Mult(scalars[i*16:(i+1)*16], 128)\n        if i < 27 {\n            ref := P1s(refs[:i+1]).Add()\n            ret := P1s(points[:i+1]).Mult(scalars, 128)\n            if !ref.Equals(ret) {\n                t.Error(\"failed self-consistency multi-scalar test\")\n            }\n        }\n    }\n    ref := P1s(refs).Add()\n    ret := P1s(points).Mult(scalars, 128)\n    if !ref.Equals(ret) {\n        t.Error(\"failed self-consistency multi-scalar test\")\n    }\n}\n\nfunc BenchmarkMultiScalarP1(b *testing.B) {\n    const npoints = 200000\n    scalars := make([]byte, npoints*32)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        b.Fatal(err.Error())\n    }\n    temp := make([]P1, npoints)\n    generator := P1Generator()\n    for i := range temp {\n        temp[i] = *generator.Mult(scalars[i*4:(i+1)*4])\n    }\n    points := P1s(temp).ToAffine()\n    run := func(points []P1Affine) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            for i:=0; i<b.N; i++ {\n                P1Affines(points).Mult(scalars, 255)\n            }\n        }\n    }\n    b.Run(fmt.Sprintf(\"%d\",npoints/8), run(points[:npoints/8]))\n    b.Run(fmt.Sprintf(\"%d\",npoints/4), run(points[:npoints/4]))\n    b.Run(fmt.Sprintf(\"%d\",npoints/2), run(points[:npoints/2]))\n    b.Run(fmt.Sprintf(\"%d\",npoints), run(points))\n}\n\nfunc BenchmarkToP1Affines(b *testing.B) {\n    const npoints = 32000\n    scalars := make([]byte, npoints*32)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        b.Fatal(err.Error())\n    }\n    temp := make([]P1, npoints)\n    generator := P1Generator()\n    for i := range temp {\n        temp[i] = *generator.Mult(scalars[i*4:(i+1)*4])\n    }\n    scratch := make([]P1Affine, npoints)\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            b.ResetTimer()\n            for i:=0; i<b.N; i++ {\n                P1s(temp[:size]).ToAffine(scratch)\n            }\n        }\n    }\n    b.Run(fmt.Sprintf(\"%d\",npoints/128), run(npoints/128))\n    b.Run(fmt.Sprintf(\"%d\",npoints/64), run(npoints/64))\n    b.Run(fmt.Sprintf(\"%d\",npoints/32), run(npoints/32))\n    b.Run(fmt.Sprintf(\"%d\",npoints/16), run(npoints/16))\n    b.Run(fmt.Sprintf(\"%d\",npoints/4), run(npoints/4))\n    b.Run(fmt.Sprintf(\"%d\",npoints), run(npoints))\n}\n"
  },
  {
    "path": "bindings/go/blst_minsig_test.go",
    "content": "// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n// DO NOT EDIT THIS FILE!!\n// The file is generated from blst_minpk_test.go by generate.py\n// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\npackage blst\n\nimport (\n    \"crypto/rand\"\n    \"fmt\"\n    \"runtime\"\n    \"testing\"\n)\n\n// Min PK.\ntype PublicKeyMinSig = P2Affine\ntype SignatureMinSig = P1Affine\ntype AggregateSignatureMinSig = P1Aggregate\ntype AggregatePublicKeyMinSig = P2Aggregate\n\n// Names in this file must be unique to support min-sig so we can't use 'dst'\n// here.\nvar dstMinSig = []byte(\"BLS_SIG_BLS12381G1_XMD:SHA-256_SSWU_RO_NUL_\")\n\nfunc init() {\n    // Use all cores when testing and benchmarking\n    SetMaxProcs(runtime.GOMAXPROCS(0))\n}\n\nfunc TestInfinityMinSig(t *testing.T) {\n    t.Parallel()\n    var infComp [BLST_P2_COMPRESS_BYTES]byte\n    infComp[0] |= 0xc0\n    new(PublicKeyMinSig).Uncompress(infComp[:])\n}\n\nfunc TestSerdesMinSig(t *testing.T) {\n    t.Parallel()\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk := KeyGen(ikm[:])\n    defer sk.Zeroize()\n\n    // Serialize/deserialize sk\n    sk2 := new(SecretKey).Deserialize(sk.Serialize())\n    defer sk2.Zeroize()\n    if !sk.Equals(sk2) {\n        t.Error(\"sk2 != sk\")\n    }\n\n    // Negative test equals\n    sk.cgo.b[0]++\n    if sk.Equals(sk2) {\n        t.Error(\"sk2 == sk\")\n    }\n\n    // pk\n    pk := new(PublicKeyMinSig).From(sk)\n\n    // Compress/decompress sk\n    pk2 := new(PublicKeyMinSig).Uncompress(pk.Compress())\n    if !pk.Equals(pk2) {\n        t.Error(\"pk2 != pk\")\n    }\n\n    // Serialize/deserialize sk\n    pk3 := new(PublicKeyMinSig).Deserialize(pk.Serialize())\n    if !pk.Equals(pk3) {\n        t.Error(\"pk3 != pk\")\n    }\n\n    // Negative test equals\n    // pk.x.l[0] = pk.x.l[0] + 1\n    // if pk.Equals(pk2) {\n    //  t.Error(\"pk2 == pk\")\n    // }\n}\n\nfunc TestSignVerifyMinSig(t *testing.T) {\n    t.Parallel()\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk0 := KeyGen(ikm[:])\n    ikm[0]++\n    sk1 := KeyGen(ikm[:])\n\n    // pk\n    pk0 := new(PublicKeyMinSig).From(sk0)\n    pk1 := new(PublicKeyMinSig).From(sk1)\n\n    // Sign\n    msg0 := []byte(\"hello foo\")\n    msg2 := []byte(\"hello bar!\")\n    sig0 := new(SignatureMinSig).Sign(sk0, msg0, dstMinSig)\n    sig2 := new(SignatureMinSig).Sign(sk1, msg2, dstMinSig)\n\n    // Verify\n    if !sig0.Verify(true, pk0, false, msg0, dstMinSig) {\n        t.Error(\"verify sig0\")\n    }\n    if !sig2.Verify(true, pk1, false, msg2, dstMinSig) {\n        t.Error(\"verify sig2\")\n    }\n    if !new(SignatureMinSig).VerifyCompressed(sig2.Compress(), true,\n                                             pk1.Compress(), false,\n                                             msg2, dstMinSig) {\n        t.Error(\"verify sig2\")\n    }\n    // Batch verify\n    if !sig0.AggregateVerify(true, []*PublicKeyMinSig{pk0}, false,\n                             []Message{msg0}, dstMinSig) {\n        t.Error(\"aggregate verify sig0\")\n    }\n    // Verify compressed inputs\n    if !new(SignatureMinSig).AggregateVerifyCompressed(sig0.Compress(), true,\n                                                      [][]byte{pk0.Compress()},\n                                                      false,\n                                                      []Message{msg0}, dstMinSig) {\n        t.Error(\"aggregate verify sig0 compressed\")\n    }\n\n    // Verify serialized inputs\n    if !new(SignatureMinSig).AggregateVerifyCompressed(sig0.Compress(), true,\n                                                      [][]byte{pk0.Serialize()},\n                                                      false,\n                                                      []Message{msg0}, dstMinSig) {\n        t.Error(\"aggregate verify sig0 serialized\")\n    }\n\n    // Compressed with empty pk\n    var emptyPk []byte\n    if new(SignatureMinSig).VerifyCompressed(sig0.Compress(), true,\n                                            emptyPk, false, msg0, dstMinSig) {\n        t.Error(\"verify sig compressed inputs\")\n    }\n    // Wrong message\n    if sig0.Verify(true, pk0, false, msg2, dstMinSig) {\n        t.Error(\"Expected Verify to return false\")\n    }\n    // Wrong key\n    if sig0.Verify(true, pk1, false, msg0, dstMinSig) {\n        t.Error(\"Expected Verify to return false\")\n    }\n    // Wrong sig\n    if sig2.Verify(true, pk0, false, msg0, dstMinSig) {\n        t.Error(\"Expected Verify to return false\")\n    }\n}\n\nfunc TestSignVerifyAugMinSig(t *testing.T) {\n    t.Parallel()\n    sk := genRandomKeyMinSig()\n    pk := new(PublicKeyMinSig).From(sk)\n    msg := []byte(\"hello foo\")\n    aug := []byte(\"augmentation\")\n    sig := new(SignatureMinSig).Sign(sk, msg, dstMinSig, aug)\n    if !sig.Verify(true, pk, false, msg, dstMinSig, aug) {\n        t.Error(\"verify sig\")\n    }\n    aug1 := []byte(\"augmentation2\")\n    if sig.Verify(true, pk, false, msg, dstMinSig, aug1) {\n        t.Error(\"verify sig, wrong augmentation\")\n    }\n    if sig.Verify(true, pk, false, msg, dstMinSig) {\n        t.Error(\"verify sig, no augmentation\")\n    }\n    // TODO: augmentation with aggregate verify\n}\n\nfunc TestSignVerifyEncodeMinSig(t *testing.T) {\n    t.Parallel()\n    sk := genRandomKeyMinSig()\n    pk := new(PublicKeyMinSig).From(sk)\n    msg := []byte(\"hello foo\")\n    sig := new(SignatureMinSig).Sign(sk, msg, dstMinSig, false)\n    if !sig.Verify(true, pk, false, msg, dstMinSig, false) {\n        t.Error(\"verify sig\")\n    }\n    if sig.Verify(true, pk, false, msg, dstMinSig) {\n        t.Error(\"verify sig expected fail, wrong hashing engine\")\n    }\n    if sig.Verify(true, pk, false, msg, dstMinSig, 0) {\n        t.Error(\"verify sig expected fail, illegal argument\")\n    }\n}\n\nfunc TestSignVerifyAggregateMinSig(t *testing.T) {\n    t.Parallel()\n    for size := 1; size < 20; size++ {\n        sks, msgs, _, pubks, _, err :=\n            generateBatchTestDataUncompressedMinSig(size)\n        if err {\n            t.Error(\"Error generating test data\")\n            return\n        }\n\n        // All signers sign the same message\n        sigs := make([]*SignatureMinSig, 0)\n        for i := 0; i < size; i++ {\n            sigs = append(sigs, new(SignatureMinSig).Sign(sks[i], msgs[0],\n                dstMinSig))\n        }\n        agProj := new(AggregateSignatureMinSig)\n        if !agProj.Aggregate(sigs, false) {\n            t.Error(\"Aggregate unexpectedly returned nil\")\n            return\n        }\n        agSig := agProj.ToAffine()\n\n        if !agSig.FastAggregateVerify(false, pubks, msgs[0], dstMinSig) {\n            t.Errorf(\"failed to verify size %d\", size)\n        }\n\n        // Negative test\n        if agSig.FastAggregateVerify(false, pubks, msgs[0][1:], dstMinSig) {\n            t.Errorf(\"failed to not verify size %d\", size)\n        }\n\n        // Test compressed signature aggregation\n        compSigs := make([][]byte, size)\n        for i := 0; i < size; i++ {\n            compSigs[i] = sigs[i].Compress()\n        }\n        agProj = new(AggregateSignatureMinSig)\n        if !agProj.AggregateCompressed(compSigs, false) {\n            t.Error(\"AggregateCompressed unexpectedly returned nil\")\n            return\n        }\n        agSig = agProj.ToAffine()\n        if !agSig.FastAggregateVerify(false, pubks, msgs[0], dstMinSig) {\n            t.Errorf(\"failed to verify size %d\", size)\n        }\n\n        // Negative test\n        if agSig.FastAggregateVerify(false, pubks, msgs[0][1:], dstMinSig) {\n            t.Errorf(\"failed to not verify size %d\", size)\n        }\n    }\n}\n\nfunc TestSignMultipleVerifyAggregateMinSig(t *testing.T) {\n    t.Parallel()\n    msgCount := 5\n    for size := 1; size < 20; size++ {\n        msgs := make([]Message, 0)\n        sks := make([]*SecretKey, 0)\n        pks := make([]*PublicKeyMinSig, 0)\n\n        // Generate messages\n        for i := 0; i < msgCount; i++ {\n            msg := Message(fmt.Sprintf(\"blst is a blast!! %d %d\", i, size))\n            msgs = append(msgs, msg)\n        }\n\n        // Generate keypairs\n        for i := 0; i < size; i++ {\n            priv := genRandomKeyMinSig()\n            sks = append(sks, priv)\n            pks = append(pks, new(PublicKeyMinSig).From(priv))\n        }\n\n        // All signers sign each message\n        aggSigs := make([]*SignatureMinSig, 0)\n        aggPks := make([]*PublicKeyMinSig, 0)\n        for i := 0; i < msgCount; i++ {\n            sigsToAgg := make([]*SignatureMinSig, 0)\n            pksToAgg := make([]*PublicKeyMinSig, 0)\n            for j := 0; j < size; j++ {\n                sigsToAgg = append(sigsToAgg,\n                                   new(SignatureMinSig).Sign(sks[j], msgs[i],\n                                                            dstMinSig))\n                pksToAgg = append(pksToAgg, pks[j])\n            }\n\n            agSig := new(AggregateSignatureMinSig)\n            if !agSig.Aggregate(sigsToAgg, true) {\n                t.Error(\"failed to aggregate\")\n            }\n            afSig := agSig.ToAffine()\n            agPk := new(AggregatePublicKeyMinSig)\n            agPk.Aggregate(pksToAgg, false)\n            afPk := agPk.ToAffine()\n            aggSigs = append(aggSigs, afSig)\n            aggPks = append(aggPks, afPk)\n\n            // Verify aggregated signature and pk\n            if !afSig.Verify(false, afPk, false, msgs[i], dstMinSig) {\n                t.Errorf(\"failed to verify single aggregate size %d\", size)\n            }\n\n        }\n\n        randFn := func(s *Scalar) {\n            var rbytes [BLST_SCALAR_BYTES]byte\n            _, err := rand.Read(rbytes[:])\n            if err != nil {\n                t.Error(err.Error())\n            }\n            s.FromBEndian(rbytes[:])\n        }\n\n        // Verify\n        randBits := 64\n        if !new(SignatureMinSig).MultipleAggregateVerify(aggSigs, true,\n                                                        aggPks, false,\n                                                        msgs, dstMinSig,\n                                                        randFn, randBits) {\n            t.Errorf(\"failed to verify multiple aggregate size %d\", size)\n        }\n\n        // Negative test\n        if new(SignatureMinSig).MultipleAggregateVerify(aggSigs, true,\n                                                       aggPks, false,\n                                                       msgs, dstMinSig[1:],\n                                                       randFn, randBits) {\n            t.Errorf(\"failed to not verify multiple aggregate size %d\", size)\n        }\n    }\n}\n\nfunc TestBatchUncompressMinSig(t *testing.T) {\n    t.Parallel()\n    size := 128\n    var points []*P1Affine\n    var compPoints [][]byte\n\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        p1 := HashToG1(msg, dstMinSig).ToAffine()\n        points = append(points, p1)\n        compPoints = append(compPoints, p1.Compress())\n    }\n    uncompPoints := new(SignatureMinSig).BatchUncompress(compPoints)\n    if uncompPoints == nil {\n        t.Errorf(\"BatchUncompress returned nil size %d\", size)\n    }\n    for i := 0; i < size; i++ {\n        if !points[i].Equals(uncompPoints[i]) {\n            t.Errorf(\"Uncompressed point does not equal initial point %d\", i)\n        }\n    }\n}\n\nfunc BenchmarkCoreSignMinSig(b *testing.B) {\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk := KeyGen(ikm[:])\n    defer sk.Zeroize()\n    msg := []byte(\"hello foo\")\n    for i := 0; i < b.N; i++ {\n        new(SignatureMinSig).Sign(sk, msg, dstMinSig)\n    }\n}\n\nfunc BenchmarkCoreVerifyMinSig(b *testing.B) {\n    var ikm = [...]byte{\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a,\n        0x08, 0x3a, 0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91,\n        0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99}\n\n    sk := KeyGen(ikm[:])\n    defer sk.Zeroize()\n    pk := new(PublicKeyMinSig).From(sk)\n    msg := []byte(\"hello foo\")\n    sig := new(SignatureMinSig).Sign(sk, msg, dstMinSig)\n\n    // Verify\n    for i := 0; i < b.N; i++ {\n        if !sig.Verify(true, pk, false, msg, dstMinSig) {\n            b.Fatal(\"verify sig\")\n        }\n    }\n}\n\nfunc BenchmarkCoreVerifyAggregateMinSig(b *testing.B) {\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            msgs, _, pubks, agsig, err := generateBatchTestDataMinSig(size)\n            if err {\n                b.Fatal(\"Error generating test data\")\n            }\n            b.ResetTimer()\n            for i := 0; i < b.N; i++ {\n                if !new(SignatureMinSig).AggregateVerifyCompressed(agsig, true,\n                                                                  pubks, false,\n                                                                  msgs, dstMinSig) {\n                    b.Fatal(\"failed to verify\")\n                }\n            }\n        }\n    }\n\n    b.Run(\"1\", run(1))\n    b.Run(\"10\", run(10))\n    b.Run(\"50\", run(50))\n    b.Run(\"100\", run(100))\n    b.Run(\"300\", run(300))\n    b.Run(\"1000\", run(1000))\n    b.Run(\"4000\", run(4000))\n}\n\nfunc BenchmarkVerifyAggregateUncompressedMinSig(b *testing.B) {\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            _, msgs, _, pubks, agsig, err :=\n                generateBatchTestDataUncompressedMinSig(size)\n            if err {\n                b.Fatal(\"Error generating test data\")\n            }\n            b.ResetTimer()\n            for i := 0; i < b.N; i++ {\n                if !agsig.AggregateVerify(true, pubks, false, msgs, dstMinSig) {\n                    b.Fatal(\"failed to verify\")\n                }\n            }\n        }\n    }\n\n    b.Run(\"1\", run(1))\n    b.Run(\"10\", run(10))\n    b.Run(\"50\", run(50))\n    b.Run(\"100\", run(100))\n    b.Run(\"300\", run(300))\n    b.Run(\"1000\", run(1000))\n    b.Run(\"4000\", run(4000))\n}\n\nfunc BenchmarkCoreAggregateMinSig(b *testing.B) {\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            _, sigs, _, _, err := generateBatchTestDataMinSig(size)\n            if err {\n                b.Fatal(\"Error generating test data\")\n            }\n            b.ResetTimer()\n            for i := 0; i < b.N; i++ {\n                var agg AggregateSignatureMinSig\n                agg.AggregateCompressed(sigs, true)\n            }\n        }\n    }\n\n    b.Run(\"1\", run(1))\n    b.Run(\"10\", run(10))\n    b.Run(\"50\", run(50))\n    b.Run(\"100\", run(100))\n    b.Run(\"300\", run(300))\n    b.Run(\"1000\", run(1000))\n    b.Run(\"4000\", run(4000))\n}\n\nfunc genRandomKeyMinSig() *SecretKey {\n    // Generate 32 bytes of randomness\n    var ikm [32]byte\n    _, err := rand.Read(ikm[:])\n\n    if err != nil {\n        return nil\n    }\n    return KeyGen(ikm[:])\n}\n\nfunc generateBatchTestDataMinSig(size int) (msgs []Message,\n    sigs [][]byte, pubks [][]byte, agsig []byte, err bool) {\n    err = false\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        msgs = append(msgs, msg)\n        priv := genRandomKeyMinSig()\n        sigs = append(sigs, new(SignatureMinSig).Sign(priv, msg, dstMinSig).\n            Compress())\n        pubks = append(pubks, new(PublicKeyMinSig).From(priv).Compress())\n    }\n    agProj := new(AggregateSignatureMinSig)\n    if !agProj.AggregateCompressed(sigs, true) {\n        fmt.Println(\"AggregateCompressed unexpectedly returned nil\")\n        err = true\n        return //nolint:revive\n    }\n    agAff := agProj.ToAffine()\n    if agAff == nil {\n        fmt.Println(\"ToAffine unexpectedly returned nil\")\n        err = true\n        return //nolint:revive\n    }\n    agsig = agAff.Compress()\n    return //nolint:revive\n}\n\nfunc generateBatchTestDataUncompressedMinSig(size int) (sks []*SecretKey,\n    msgs []Message, sigs []*SignatureMinSig, //nolint:unparam\n    pubks []*PublicKeyMinSig, agsig *SignatureMinSig, err bool) {\n    err = false\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        msgs = append(msgs, msg)\n        priv := genRandomKeyMinSig()\n        sks = append(sks, priv)\n        sigs = append(sigs, new(SignatureMinSig).Sign(priv, msg, dstMinSig))\n        pubks = append(pubks, new(PublicKeyMinSig).From(priv))\n    }\n    agProj := new(AggregateSignatureMinSig)\n    if !agProj.Aggregate(sigs, true) {\n        fmt.Println(\"Aggregate unexpectedly returned nil\")\n        err = true\n        return //nolint:revive\n    }\n    agsig = agProj.ToAffine()\n    return //nolint:revive\n}\n\nfunc BenchmarkBatchUncompressMinSig(b *testing.B) {\n    size := 128\n    var compPoints [][]byte\n\n    for i := 0; i < size; i++ {\n        msg := Message(fmt.Sprintf(\"blst is a blast!! %d\", i))\n        p1 := HashToG1(msg, dstMinSig).ToAffine()\n        compPoints = append(compPoints, p1.Compress())\n    }\n    b.Run(\"Single\", func(b *testing.B) {\n        b.ResetTimer()\n        b.ReportAllocs()\n        var tmp SignatureMinSig\n        for i := 0; i < b.N; i++ {\n            for j := 0; j < size; j++ {\n                if tmp.Uncompress(compPoints[j]) == nil {\n                    b.Fatal(\"could not uncompress point\")\n                }\n            }\n        }\n    })\n    b.Run(\"Batch\", func(b *testing.B) {\n        b.ResetTimer()\n        b.ReportAllocs()\n        var tmp SignatureMinSig\n        for i := 0; i < b.N; i++ {\n            if tmp.BatchUncompress(compPoints) == nil {\n                b.Fatal(\"could not batch uncompress points\")\n            }\n        }\n    })\n}\n\nfunc TestSignVerifyAggregateValidatesInfinitePubkeyMinSig(t *testing.T) {\n    t.Parallel()\n    size := 20\n    sks, msgs, _, pubks, _, err :=\n      generateBatchTestDataUncompressedMinSig(size)\n    if err {\n        t.Error(\"Error generating test data\")\n        return\n    }\n\n    // All signers sign the same message\n    sigs := make([]*SignatureMinSig, size)\n    for i := range sigs {\n        sigs[i] = new(SignatureMinSig).Sign(sks[i], msgs[i], dstMinSig)\n    }\n\n    // Single message: Infinite pubkeys and signature\n    zeroKey := new(PublicKeyMinSig)\n    zeroSig := new(SignatureMinSig)\n    agProj := new(AggregateSignatureMinSig)\n    if !agProj.Aggregate([]*SignatureMinSig{zeroSig}, false) {\n        t.Error(\"Aggregate unexpectedly returned nil\")\n        return\n    }\n    agSig := agProj.ToAffine()\n\n    if agSig.AggregateVerify(false, []*PublicKeyMinSig{zeroKey}, false,\n                             [][]byte{msgs[0]}, dstMinSig) {\n        t.Error(\"failed to NOT verify signature\")\n    }\n\n    // Replace firstkey with infinite pubkey.\n    pubks[0] = zeroKey\n    sigs[0] = zeroSig\n    agProj = new(AggregateSignatureMinSig)\n    if !agProj.Aggregate(sigs, false) {\n        t.Error(\"Aggregate unexpectedly returned nil\")\n        return\n    }\n    agSig = agProj.ToAffine()\n\n    if agSig.AggregateVerify(false, pubks, false, msgs, dstMinSig) {\n        t.Error(\"failed to NOT verify signature\")\n    }\n}\n\nfunc TestEmptyMessageMinSig(t *testing.T) {\n    t.Parallel()\n    msg := []byte(\"\")\n    var sk_bytes = []byte {99, 64, 58, 175, 15, 139, 113, 184, 37, 222, 127,\n        204, 233, 209, 34, 8, 61, 27, 85, 251, 68, 31, 255, 214, 8, 189, 190, 71,\n        198, 16, 210, 91};\n    sk := new(SecretKey).Deserialize(sk_bytes)\n    pk := new(PublicKeyMinSig).From(sk)\n    sig := new(SignatureMinSig).Sign(sk, msg, dstMinSig)\n    if !new(SignatureMinSig).VerifyCompressed(sig.Compress(), true,\n        pk.Compress(), false, msg, dstMinSig) {\n        t.Error(\"failed to verify empty message\")\n    }\n}\n\nfunc TestEmptySignatureMinSig(t *testing.T) {\n    t.Parallel()\n    msg := []byte(\"message\")\n    var sk_bytes = []byte {99, 64, 58, 175, 15, 139, 113, 184, 37, 222, 127,\n        204, 233, 209, 34, 8, 61, 27, 85, 251, 68, 31, 255, 214, 8, 189, 190, 71,\n        198, 16, 210, 91};\n    sk := new(SecretKey).Deserialize(sk_bytes)\n    pk := new(PublicKeyMinSig).From(sk)\n    var emptySig []byte\n    if new(SignatureMinSig).VerifyCompressed(emptySig, true, pk.Compress(), false, msg, dstMinSig) {\n        t.Error(\"failed to NOT verify empty signature\")\n    }\n}\n\nfunc TestMultiScalarP2(t *testing.T) {\n    t.Parallel()\n    const npoints = 1027\n    scalars := make([]byte, npoints*16)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        t.Error(err.Error())\n\treturn\n    }\n    points := make([]P2, npoints)\n    refs   := make([]P2, npoints)\n    generator := P2Generator()\n    for i := range points {\n        points[i] = *generator.Mult(scalars[i*4:(i+1)*4])\n        refs[i]   = *points[i].Mult(scalars[i*16:(i+1)*16], 128)\n        if i < 27 {\n            ref := P2s(refs[:i+1]).Add()\n            ret := P2s(points[:i+1]).Mult(scalars, 128)\n            if !ref.Equals(ret) {\n                t.Error(\"failed self-consistency multi-scalar test\")\n            }\n        }\n    }\n    ref := P2s(refs).Add()\n    ret := P2s(points).Mult(scalars, 128)\n    if !ref.Equals(ret) {\n        t.Error(\"failed self-consistency multi-scalar test\")\n    }\n}\n\nfunc BenchmarkMultiScalarP2(b *testing.B) {\n    const npoints = 200000\n    scalars := make([]byte, npoints*32)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        b.Fatal(err.Error())\n    }\n    temp := make([]P2, npoints)\n    generator := P2Generator()\n    for i := range temp {\n        temp[i] = *generator.Mult(scalars[i*4:(i+1)*4])\n    }\n    points := P2s(temp).ToAffine()\n    run := func(points []P2Affine) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            for i:=0; i<b.N; i++ {\n                P2Affines(points).Mult(scalars, 255)\n            }\n        }\n    }\n    b.Run(fmt.Sprintf(\"%d\",npoints/8), run(points[:npoints/8]))\n    b.Run(fmt.Sprintf(\"%d\",npoints/4), run(points[:npoints/4]))\n    b.Run(fmt.Sprintf(\"%d\",npoints/2), run(points[:npoints/2]))\n    b.Run(fmt.Sprintf(\"%d\",npoints), run(points))\n}\n\nfunc BenchmarkToP2Affines(b *testing.B) {\n    const npoints = 32000\n    scalars := make([]byte, npoints*32)\n    _, err := rand.Read(scalars)\n    if err != nil {\n        b.Fatal(err.Error())\n    }\n    temp := make([]P2, npoints)\n    generator := P2Generator()\n    for i := range temp {\n        temp[i] = *generator.Mult(scalars[i*4:(i+1)*4])\n    }\n    scratch := make([]P2Affine, npoints)\n    run := func(size int) func(b *testing.B) {\n        return func(b *testing.B) {\n            b.Helper()\n            b.ResetTimer()\n            for i:=0; i<b.N; i++ {\n                P2s(temp[:size]).ToAffine(scratch)\n            }\n        }\n    }\n    b.Run(fmt.Sprintf(\"%d\",npoints/128), run(npoints/128))\n    b.Run(fmt.Sprintf(\"%d\",npoints/64), run(npoints/64))\n    b.Run(fmt.Sprintf(\"%d\",npoints/32), run(npoints/32))\n    b.Run(fmt.Sprintf(\"%d\",npoints/16), run(npoints/16))\n    b.Run(fmt.Sprintf(\"%d\",npoints/4), run(npoints/4))\n    b.Run(fmt.Sprintf(\"%d\",npoints), run(npoints))\n}\n"
  },
  {
    "path": "bindings/go/blst_misc.tgo",
    "content": "\nimport \"fmt\"\n\n//\n// Parse out optional arguments for sign and verify.\n//  augSingle []byte - augmentation bytes for aggregate verify (default: nil)\n//  aug [][]byte - augmentation bytes for signing (default: nil)\n//\nfunc parseOpts(optional ...interface{}) (augSingle []byte, aug [][]byte,\n                                         useHash bool, ok bool) {\n    useHash = true  // hash (true), encode (false)\n\n    for _, arg := range optional {\n        switch v := arg.(type) {\n        case []byte:\n            augSingle = v\n        case [][]byte:\n            aug = v\n        case bool:\n            useHash = v\n        default:\n            return nil, nil, useHash, false\n        }\n    }\n    return augSingle, aug, useHash, true\n}\n\n//\n// These methods are inefficient because of cgo call overhead. For this\n// reason they should be used primarily for prototyping with a goal to\n// formulate interfaces that would process multiple scalars per cgo call.\n//\nfunc (a *Scalar) MulAssign(b *Scalar) (*Scalar, bool) {\n    return a, bool(C.blst_sk_mul_n_check(&a.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Mul(b *Scalar) (*Scalar, bool) {\n    var ret Scalar\n    return &ret, bool(C.blst_sk_mul_n_check(&ret.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) AddAssign(b *Scalar) (*Scalar, bool) {\n    return a, bool(C.blst_sk_add_n_check(&a.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Add(b *Scalar) (*Scalar, bool) {\n    var ret Scalar\n    return &ret, bool(C.blst_sk_add_n_check(&ret.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) SubAssign(b *Scalar) (*Scalar, bool) {\n    return a, bool(C.blst_sk_sub_n_check(&a.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Sub(b *Scalar) (*Scalar, bool) {\n    var ret Scalar\n    return &ret, bool(C.blst_sk_sub_n_check(&ret.cgo, &a.cgo, &b.cgo))\n}\n\nfunc (a *Scalar) Inverse() *Scalar {\n    var ret Scalar\n    C.blst_sk_inverse(&ret.cgo, &a.cgo)\n    return &ret\n}\n\n//\n// Serialization/Deserialization.\n//\n\n// Scalar serdes\nfunc (s *Scalar) Serialize() []byte {\n    var out [BLST_SCALAR_BYTES]byte\n    C.blst_bendian_from_scalar((*C.byte)(&out[0]), &s.cgo)\n    return out[:]\n}\n\nfunc (s *Scalar) Deserialize(in []byte) *Scalar {\n    if len(in) != BLST_SCALAR_BYTES ||\n        !C.go_scalar_from_bendian(&s.cgo, (*C.byte)(&in[0])) {\n        return nil\n    }\n    return s\n}\n\nfunc (s *Scalar) Valid() bool {\n    return bool(C.blst_sk_check(&s.cgo))\n}\n\nfunc (s *Scalar) HashTo(msg []byte, dst []byte) bool {\n    ret := HashToScalar(msg, dst)\n    if ret != nil {\n        *s = *ret\n        return true\n    }\n    return false\n}\n\nfunc HashToScalar(msg []byte, dst []byte) *Scalar {\n    var ret Scalar\n\n    if C.go_hash_to_scalar(&ret.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n                                     ptrOrNil(dst), C.size_t(len(dst))) {\n        return &ret\n    }\n\n    return nil\n}\n\n//\n// LEndian\n//\n\nfunc (fr *Scalar) ToLEndian() []byte {\n    var arr [BLST_SCALAR_BYTES]byte\n    C.blst_lendian_from_scalar((*C.byte)(&arr[0]), &fr.cgo)\n    return arr[:]\n}\n\nfunc (fp *Fp) ToLEndian() []byte {\n    var arr [BLST_FP_BYTES]byte\n    C.blst_lendian_from_fp((*C.byte)(&arr[0]), &fp.cgo)\n    return arr[:]\n}\n\nfunc (fr *Scalar) FromLEndian(arr []byte) *Scalar {\n    nbytes := len(arr)\n    if nbytes < BLST_SCALAR_BYTES ||\n        !C.blst_scalar_from_le_bytes(&fr.cgo, (*C.byte)(&arr[0]), C.size_t(nbytes)) {\n        return nil\n    }\n    return fr\n}\n\nfunc (fp *Fp) FromLEndian(arr []byte) *Fp {\n    if len(arr) != BLST_FP_BYTES {\n        return nil\n    }\n    C.blst_fp_from_lendian(&fp.cgo, (*C.byte)(&arr[0]))\n    return fp\n}\n\n//\n// BEndian\n//\n\nfunc (fr *Scalar) ToBEndian() []byte {\n    var arr [BLST_SCALAR_BYTES]byte\n    C.blst_bendian_from_scalar((*C.byte)(&arr[0]), &fr.cgo)\n    return arr[:]\n}\n\nfunc (fp *Fp) ToBEndian() []byte {\n    var arr [BLST_FP_BYTES]byte\n    C.blst_bendian_from_fp((*C.byte)(&arr[0]), &fp.cgo)\n    return arr[:]\n}\n\nfunc (fr *Scalar) FromBEndian(arr []byte) *Scalar {\n    nbytes := len(arr)\n    if nbytes < BLST_SCALAR_BYTES ||\n        !C.blst_scalar_from_be_bytes(&fr.cgo, (*C.byte)(&arr[0]), C.size_t(nbytes)) {\n        return nil\n    }\n    return fr\n}\n\nfunc (fp *Fp) FromBEndian(arr []byte) *Fp {\n    if len(arr) != BLST_FP_BYTES {\n        return nil\n    }\n    C.blst_fp_from_bendian(&fp.cgo, (*C.byte)(&arr[0]))\n    return fp\n}\n\n//\n// Printing\n//\n\nfunc PrintBytes(val []byte, name string) {\n    fmt.Printf(\"%s = %02x\\n\", name, val)\n}\n\nfunc (s *Scalar) Print(name string) {\n    arr := s.ToBEndian()\n    PrintBytes(arr, name)\n}\n\nfunc (p *P1Affine) Print(name string) {\n    fmt.Printf(\"%s:\\n\", name)\n    x := Fp{p.cgo.x}\n    arr := x.ToBEndian()\n    PrintBytes(arr, \"  x\")\n    y := Fp{p.cgo.y}\n    arr = y.ToBEndian()\n    PrintBytes(arr, \"  y\")\n}\n\nfunc (p *P1) Print(name string) {\n    fmt.Printf(\"%s:\\n\", name)\n    aff := p.ToAffine()\n    aff.Print(name)\n}\n\nfunc (f *Fp2) Print(name string) {\n    fmt.Printf(\"%s:\\n\", name)\n    var arr [BLST_FP_BYTES]byte\n    C.blst_bendian_from_fp((*C.byte)(&arr[0]), &f.cgo.fp[0])\n    PrintBytes(arr[:], \"    0\")\n    C.blst_bendian_from_fp((*C.byte)(&arr[0]), &f.cgo.fp[1])\n    PrintBytes(arr[:], \"    1\")\n}\n\nfunc (p *P2Affine) Print(name string) {\n    fmt.Printf(\"%s:\\n\", name)\n    x := Fp2{p.cgo.x}\n    x.Print(\"  x\")\n    y := Fp2{p.cgo.y}\n    y.Print(\"  y\")\n}\n\nfunc (p *P2) Print(name string) {\n    fmt.Printf(\"%s:\\n\", name)\n    aff := p.ToAffine()\n    aff.Print(name)\n}\n\n//\n// Equality\n//\n\nfunc (s1 *Scalar) Equals(s2 *Scalar) bool {\n    return *s1 == *s2;\n}\n\nfunc (e1 *Fp) Equals(e2 *Fp) bool {\n    return *e1 == *e2;\n}\n\nfunc (e1 *Fp2) Equals(e2 *Fp2) bool {\n    return *e1 == *e2;\n}\n\nfunc (e1 *P1Affine) Equals(e2 *P1Affine) bool {\n    return bool(C.blst_p1_affine_is_equal(&e1.cgo, &e2.cgo))\n}\n\nfunc (pt *P1Affine) asPtr() *C.blst_p1_affine {\n    if (pt != nil) {\n        return &pt.cgo\n    }\n\n    return nil\n}\n\nfunc (e1 *P1) Equals(e2 *P1) bool {\n    return bool(C.blst_p1_is_equal(&e1.cgo, &e2.cgo))\n}\n\nfunc (e1 *P2Affine) Equals(e2 *P2Affine) bool {\n    return bool(C.blst_p2_affine_is_equal(&e1.cgo, &e2.cgo))\n}\n\nfunc (pt *P2Affine) asPtr() *C.blst_p2_affine {\n    if (pt != nil) {\n        return &pt.cgo\n    }\n\n    return nil\n}\n\nfunc (e1 *P2) Equals(e2 *P2) bool {\n    return bool(C.blst_p2_is_equal(&e1.cgo, &e2.cgo))\n}\n\n// private thunk for testing\n\nfunc expandMessageXmd(msg []byte, dst []byte, len_in_bytes int) []byte {\n    ret := make([]byte, len_in_bytes)\n\n    C.blst_expand_message_xmd((*C.byte)(&ret[0]), C.size_t(len(ret)),\n                              ptrOrNil(msg), C.size_t(len(msg)),\n                              ptrOrNil(dst), C.size_t(len(dst)))\n    return ret\n}\n\nfunc breakdown(nbits, window, ncpus int) (nx int, ny int, wnd int) {\n\n    if nbits > window*ncpus { //nolint:nestif\n        nx = 1\n        wnd = bits.Len(uint(ncpus)/4)\n        if (window + wnd) > 18 {\n            wnd = window - wnd\n        } else {\n            wnd = (nbits / window + ncpus - 1) / ncpus;\n            if (nbits / (window + 1) + ncpus - 1) / ncpus < wnd {\n                wnd = window + 1;\n            } else {\n                wnd = window;\n            }\n        }\n    } else {\n        nx = 2\n        wnd = window-2\n        for (nbits/wnd+1)*nx < ncpus {\n            nx += 1\n            wnd = window - bits.Len(3*uint(nx)/2)\n        }\n        nx -= 1\n        wnd = window - bits.Len(3*uint(nx)/2)\n    }\n    ny = nbits/wnd + 1\n    wnd = nbits/ny + 1\n\n    return nx, ny, wnd\n}\n\nfunc pippenger_window_size(npoints int) int {\n    wbits := bits.Len(uint(npoints))\n\n    if wbits > 13 {\n        return wbits - 4\n    }\n    if wbits > 5 {\n        return wbits - 3\n    }\n    return 2\n}\n"
  },
  {
    "path": "bindings/go/blst_px.tgo",
    "content": "func PairingAggregatePkInG1(ctx Pairing, PK *P1Affine, pkValidate bool,\n                            sig *P2Affine, sigGroupcheck bool, msg []byte,\n                            optional ...[]byte) int { // aug\n    var aug []byte\n    if len(optional) > 0 {\n        aug = optional[0]\n    }\n\n    r := C.blst_pairing_chk_n_aggr_pk_in_g1(&ctx[0],\n                                            PK.asPtr(), C.bool(pkValidate),\n                                            sig.asPtr(), C.bool(sigGroupcheck),\n                                            ptrOrNil(msg), C.size_t(len(msg)),\n                                            ptrOrNil(aug), C.size_t(len(aug)))\n\n    return int(r)\n}\n\nfunc PairingMulNAggregatePkInG1(ctx Pairing, PK *P1Affine, pkValidate bool,\n                                sig *P2Affine, sigGroupcheck bool,\n                                rand *Scalar, randBits int, msg []byte,\n                                optional ...[]byte) int { // aug\n    var aug []byte\n    if len(optional) > 0 {\n        aug = optional[0]\n    }\n\n    r := C.blst_pairing_chk_n_mul_n_aggr_pk_in_g1(&ctx[0],\n                                                  PK.asPtr(), C.bool(pkValidate),\n                                                  sig.asPtr(), C.bool(sigGroupcheck),\n                                                  &rand.cgo.b[0], C.size_t(randBits),\n                                                  ptrOrNil(msg), C.size_t(len(msg)),\n                                                  ptrOrNil(aug), C.size_t(len(aug)))\n\n    return int(r)\n}\n\n//\n// Serialization/Deserialization.\n//\n\n// P1 Serdes\nfunc (p1 *P1Affine) Serialize() []byte {\n    var out [BLST_P1_SERIALIZE_BYTES]byte\n    C.blst_p1_affine_serialize((*C.byte)(&out[0]), &p1.cgo)\n    return out[:]\n}\n\nfunc (p1 *P1Affine) Deserialize(in []byte) *P1Affine {\n    if len(in) != BLST_P1_SERIALIZE_BYTES {\n        return nil\n    }\n    if C.blst_p1_deserialize(&p1.cgo, (*C.byte)(&in[0])) != C.BLST_SUCCESS {\n        return nil\n    }\n    return p1\n}\nfunc (p1 *P1Affine) Compress() []byte {\n    var out [BLST_P1_COMPRESS_BYTES]byte\n    C.blst_p1_affine_compress((*C.byte)(&out[0]), &p1.cgo)\n    return out[:]\n}\n\nfunc (p1 *P1Affine) Uncompress(in []byte) *P1Affine {\n    if len(in) != BLST_P1_COMPRESS_BYTES {\n        return nil\n    }\n    if C.blst_p1_uncompress(&p1.cgo, (*C.byte)(&in[0])) != C.BLST_SUCCESS {\n        return nil\n    }\n    return p1\n}\n\nfunc (p1 *P1Affine) InG1() bool {\n  return bool(C.blst_p1_affine_in_g1(&p1.cgo))\n}\n\nfunc (*P1Affine) BatchUncompress(in [][]byte) []*P1Affine {\n    // Allocate space for all of the resulting points. Later we'll save pointers\n    // and return those so that the result could be used in other functions,\n    // such as MultipleAggregateVerify.\n    n := len(in)\n    points := make([]P1Affine, n)\n    pointsPtrs := make([]*P1Affine, n)\n\n    numThreads := numThreads(n)\n\n    // Each thread will determine next message to process by atomically\n    // incrementing curItem, process corresponding point, and\n    // repeat until n is exceeded. Each thread will send a result (true for\n    // success, false for failure) into the channel when complete.\n    resCh := make(chan bool, numThreads)\n    valid := int32(1)\n    curItem := uint32(0)\n    for tid := 0; tid < numThreads; tid++ {\n        go func() {\n            for atomic.LoadInt32(&valid) > 0 {\n                // Get a work item\n                work := atomic.AddUint32(&curItem, 1) - 1\n                if work >= uint32(n) {\n                    break\n                }\n                if points[work].Uncompress(in[work]) == nil {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n                pointsPtrs[work] = &points[work]\n            }\n            if atomic.LoadInt32(&valid) > 0 {\n                resCh <- true\n            } else {\n                resCh <- false\n            }\n        }()\n    }\n\n    // Collect the threads\n    result := true\n    for i := 0; i < numThreads; i++ {\n        if ! <-resCh {\n            result = false\n        }\n    }\n    if atomic.LoadInt32(&valid) == 0 || !result {\n        return nil\n    }\n    return pointsPtrs\n}\n\nfunc (p1 *P1) Serialize() []byte {\n    var out [BLST_P1_SERIALIZE_BYTES]byte\n    C.blst_p1_serialize((*C.byte)(&out[0]), &p1.cgo)\n    return out[:]\n}\nfunc (p1 *P1) Compress() []byte {\n    var out [BLST_P1_COMPRESS_BYTES]byte\n    C.blst_p1_compress((*C.byte)(&out[0]), &p1.cgo)\n    return out[:]\n}\n\nfunc (p1 *P1) MultAssign(scalarIf interface{}, optional ...int) *P1 {\n    var nbits int\n    var scalar *C.byte\n    switch val := scalarIf.(type) {\n    case []byte:\n        scalar = (*C.byte)(&val[0])\n        nbits = len(val)*8\n    case *Scalar:\n        scalar = &val.cgo.b[0]\n        nbits = 255\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\", val))\n    }\n    if len(optional) > 0 {\n        nbits = optional[0]\n    }\n    C.blst_p1_mult(&p1.cgo, &p1.cgo, scalar, C.size_t(nbits))\n    return p1\n}\n\nfunc (p1 *P1) Mult(scalarIf interface{}, optional ...int) *P1 {\n    ret := *p1\n    return ret.MultAssign(scalarIf, optional...)\n}\n\nfunc (p1 *P1) AddAssign(pointIf interface{}) *P1 {\n    switch val := pointIf.(type) {\n    case *P1:\n        C.blst_p1_add_or_double(&p1.cgo, &p1.cgo, &val.cgo)\n    case *P1Affine:\n        C.blst_p1_add_or_double_affine(&p1.cgo, &p1.cgo, &val.cgo)\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\", val))\n    }\n    return p1\n}\n\nfunc (p1 *P1) Add(pointIf interface{}) *P1 {\n    ret := *p1\n    return ret.AddAssign(pointIf)\n}\n\nfunc (p1 *P1) SubAssign(pointIf interface{}) *P1 {\n    var x *C.blst_fp\n    var affine C.bool\n    switch val := pointIf.(type) {\n    case *P1:\n        x = &val.cgo.x\n        affine = false\n    case *P1Affine:\n        x = &val.cgo.x\n        affine = true\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\", val))\n    }\n    C.go_p1_sub_assign(&p1.cgo, x, affine)\n    return p1\n}\n\nfunc (p1 *P1) Sub(pointIf interface{}) *P1 {\n    ret := *p1\n    return ret.SubAssign(pointIf)\n}\n\nfunc P1Generator() *P1 {\n    return &cgo_p1Generator\n}\n\n// 'acc += point * scalar', passing 'nil' for 'point' means \"use the\n//                          group generator point\"\nfunc (acc *P1) MultNAccumulate(pointIf interface{}, scalarIf interface{},\n                               optional ...int) *P1 {\n    var x *C.blst_fp\n    var affine C.bool\n    if pointIf != nil {\n        switch val := pointIf.(type) {\n        case *P1:\n            x = &val.cgo.x\n            affine = false\n        case *P1Affine:\n            x = &val.cgo.x\n            affine = true\n        default:\n            panic(fmt.Sprintf(\"unsupported type %T\", val))\n        }\n    }\n    var nbits int\n    var scalar *C.byte\n    switch val := scalarIf.(type) {\n    case []byte:\n        scalar = (*C.byte)(&val[0])\n        nbits = len(val)*8\n    case *Scalar:\n        scalar = &val.cgo.b[0]\n        nbits = 255\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\", val))\n    }\n    if len(optional) > 0 {\n        nbits = optional[0]\n    }\n    C.go_p1_mult_n_acc(&acc.cgo, x, affine, scalar, C.size_t(nbits))\n    return acc\n}\n\n//\n// Affine\n//\n\nfunc (p *P1) ToAffine() *P1Affine {\n    var pa P1Affine\n    C.blst_p1_to_affine(&pa.cgo, &p.cgo)\n    return &pa\n}\n\nfunc (p *P1) FromAffine(pa *P1Affine) {\n    C.blst_p1_from_affine(&p.cgo, &pa.cgo)\n}\n\n//\n// Hash\n//\nfunc HashToG1(msg []byte, dst []byte,\n        optional ...[]byte) *P1 { // aug\n    var q P1\n\n    var aug []byte\n    if len(optional) > 0 {\n        aug = optional[0]\n    }\n\n    C.blst_hash_to_g1(&q.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n                              ptrOrNil(dst), C.size_t(len(dst)),\n                              ptrOrNil(aug), C.size_t(len(aug)))\n    return &q\n}\n\nfunc EncodeToG1(msg []byte, dst []byte,\n        optional ...[]byte) *P1 { // aug\n    var q P1\n\n    var aug []byte\n    if len(optional) > 0 {\n        aug = optional[0]\n    }\n\n    C.blst_encode_to_g1(&q.cgo, ptrOrNil(msg), C.size_t(len(msg)),\n                                ptrOrNil(dst), C.size_t(len(dst)),\n                                ptrOrNil(aug), C.size_t(len(aug)))\n    return &q\n}\n\n//\n// Multi-point/scalar operations\n//\n\nfunc P1sToAffine(points []*P1, optional ...int) P1Affines {\n    var npoints int\n    if len(optional) > 0 {\n        npoints = optional[0]\n    } else {\n        npoints = len(points)\n    }\n    ret := make([]P1Affine, npoints)\n    _cgoCheckPointer := func(...interface{}) {}\n    C.blst_p1s_to_affine(&ret[0].cgo, (**C.blst_p1)(unsafe.Pointer(&points[0])),\n                         C.size_t(npoints))\n    return ret\n}\n\nfunc (points P1s) ToAffine(optional ...P1Affines) P1Affines {\n    npoints := len(points)\n    var ret P1Affines\n\n    if len(optional) > 0 {  // used in benchmark\n        ret = optional[0]\n        if len(ret) < npoints {\n            panic(\"npoints mismatch\")\n        }\n    } else {\n        ret = make([]P1Affine, npoints)\n    }\n\n    if maxProcs < 2 || npoints < 768 {\n        C.go_p1slice_to_affine(&ret[0].cgo, &points[0].cgo, C.size_t(npoints))\n        return ret\n    }\n\n    nslices := (npoints + 511) / 512\n    if nslices > maxProcs {\n        nslices = maxProcs\n    }\n    delta, rem := npoints/nslices + 1, npoints%nslices\n\n    var wg sync.WaitGroup\n    wg.Add(nslices)\n    for x := 0; x < npoints; x += delta {\n        if rem == 0 {\n            delta -= 1\n        }\n        rem -= 1\n        go func(out *P1Affine, inp *P1, delta int) {\n            C.go_p1slice_to_affine(&out.cgo, &inp.cgo, C.size_t(delta))\n            wg.Done()\n        }(&ret[x], &points[x], delta)\n    }\n    wg.Wait()\n\n    return ret\n}\n\n//\n// Batch addition\n//\n\nfunc P1AffinesAdd(points []*P1Affine, optional ...int) *P1 {\n    var npoints int\n    if len(optional) > 0 {\n        npoints = optional[0]\n    } else {\n        npoints = len(points)\n    }\n    var ret P1\n    _cgoCheckPointer := func(...interface{}) {}\n    C.blst_p1s_add(&ret.cgo, (**C.blst_p1_affine)(unsafe.Pointer(&points[0])),\n                             C.size_t(npoints))\n    return &ret\n}\n\nfunc (points P1Affines) Add() *P1 {\n    npoints := len(points)\n    if maxProcs < 2 || npoints < 768 {\n        var ret P1\n        C.go_p1slice_add(&ret.cgo, &points[0].cgo, C.size_t(npoints))\n        return &ret\n    }\n\n    nslices := (npoints + 511) / 512\n    if nslices > maxProcs {\n        nslices = maxProcs\n    }\n    delta, rem := npoints/nslices + 1, npoints%nslices\n\n    msgs := make(chan P1, nslices)\n    for x := 0; x < npoints; x += delta {\n        if rem == 0 {\n            delta -= 1\n        }\n        rem -= 1\n        go func(points *P1Affine, delta int) {\n            var ret P1\n            C.go_p1slice_add(&ret.cgo, &points.cgo, C.size_t(delta))\n            msgs <- ret\n        }(&points[x], delta)\n    }\n\n    ret := <- msgs\n    for i := 1; i < nslices; i++ {\n        msg := <- msgs\n        C.blst_p1_add_or_double(&ret.cgo, &ret.cgo, &msg.cgo)\n    }\n    return &ret\n}\n\nfunc (points P1s) Add() *P1 {\n    return points.ToAffine().Add()\n}\n\n//\n// Multi-scalar multiplication\n//\n\nfunc P1AffinesMult(pointsIf interface{}, scalarsIf interface{}, nbits int) *P1 {\n    var npoints int\n    switch val := pointsIf.(type) {\n    case []*P1Affine:\n        npoints = len(val)\n    case []P1Affine:\n        npoints = len(val)\n    case P1Affines:\n        npoints = len(val)\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\", val))\n    }\n\n    nbytes := (nbits+7)/8\n    var scalars []*C.byte\n    switch val := scalarsIf.(type) {\n    case []byte:\n        if len(val) < npoints*nbytes {\n            return nil\n        }\n    case [][]byte:\n        if len(val) < npoints {\n            return nil\n        }\n        scalars = make([]*C.byte, npoints)\n        for i := range scalars {\n            scalars[i] = (*C.byte)(&val[i][0])\n        }\n    case []Scalar:\n        if len(val) < npoints {\n            return nil\n        }\n        if nbits <= 248 {\n            scalars = make([]*C.byte, npoints)\n            for i := range scalars {\n                scalars[i] = &val[i].cgo.b[0]\n            }\n        }\n    case []*Scalar:\n        if len(val) < npoints {\n            return nil\n        }\n        scalars = make([]*C.byte, npoints)\n        for i := range scalars {\n            scalars[i] = &val[i].cgo.b[0]\n        }\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\",val))\n    }\n\n    numThreads := numThreads(0)\n\n    if numThreads < 2 {\n        sz := int(C.blst_p1s_mult_pippenger_scratch_sizeof(C.size_t(npoints)))/8\n        scratch := make([]uint64, sz)\n\n        pointsBySlice := [2]*C.blst_p1_affine{nil, nil}\n        var p_points **C.blst_p1_affine\n        switch val := pointsIf.(type) {\n        case []*P1Affine:\n            p_points = (**C.blst_p1_affine)(unsafe.Pointer(&val[0]))\n        case []P1Affine:\n            pointsBySlice[0] = &val[0].cgo\n            p_points = &pointsBySlice[0]\n        case P1Affines:\n            pointsBySlice[0] = &val[0].cgo\n            p_points = &pointsBySlice[0]\n        default: // type is already vetted\n        }\n\n        scalarsBySlice := [2]*C.byte{nil, nil}\n        var p_scalars **C.byte\n        switch val := scalarsIf.(type) {\n        case []byte:\n            scalarsBySlice[0] = (*C.byte)(&val[0])\n            p_scalars = &scalarsBySlice[0]\n        case [][]byte:\n            p_scalars = &scalars[0]\n        case []Scalar:\n            if nbits > 248 {\n                scalarsBySlice[0] = &val[0].cgo.b[0]\n                p_scalars = &scalarsBySlice[0]\n            } else {\n                p_scalars = &scalars[0]\n            }\n        case []*Scalar:\n            p_scalars = &scalars[0]\n        default: // type is already vetted\n        }\n\n        var ret P1\n        _cgoCheckPointer := func(...interface{}) {}\n        C.blst_p1s_mult_pippenger(&ret.cgo, p_points, C.size_t(npoints),\n                                            p_scalars, C.size_t(nbits),\n                                            (*C.limb_t)(&scratch[0]))\n\n        for i := range(scalars) {\n            scalars[i] = nil\n        }\n\n        return &ret\n    }\n\n    if npoints < 32 {\n        if numThreads > npoints {\n            numThreads = npoints\n        }\n\n        curItem := uint32(0)\n        msgs := make(chan P1, numThreads)\n\n        for tid := 0; tid < numThreads; tid++ {\n            go func() {\n                var acc P1\n\n                for {\n                    workItem := int(atomic.AddUint32(&curItem, 1) - 1)\n                    if workItem >= npoints {\n                        break\n                    }\n\n                    var point *P1Affine\n                    switch val := pointsIf.(type) {\n                    case []*P1Affine:\n                        point = val[workItem]\n                    case []P1Affine:\n                        point = &val[workItem]\n                    case P1Affines:\n                        point = &val[workItem]\n                    default: // type is already vetted\n                    }\n\n                    var scalar *C.byte\n                    switch val := scalarsIf.(type) {\n                    case []byte:\n                        scalar = (*C.byte)(&val[workItem*nbytes])\n                    case [][]byte:\n                        scalar = scalars[workItem]\n                    case []Scalar:\n                        if nbits > 248 {\n                            scalar = &val[workItem].cgo.b[0]\n                        } else {\n                            scalar = scalars[workItem]\n                        }\n                    case []*Scalar:\n                        scalar = scalars[workItem]\n                    default: // type is already vetted\n                    }\n\n                    C.go_p1_mult_n_acc(&acc.cgo, &point.cgo.x, true,\n                                                 scalar, C.size_t(nbits))\n                }\n\n                msgs <- acc\n            }()\n        }\n\n        ret := <-msgs\n        for tid := 1; tid < numThreads; tid++ {\n            point := <- msgs\n            C.blst_p1_add_or_double(&ret.cgo, &ret.cgo, &point.cgo);\n        }\n\n        for i := range(scalars) {\n            scalars[i] = nil\n        }\n\n        return &ret\n    }\n\n    // this is sizeof(scratch[0])\n    sz := int(C.blst_p1s_mult_pippenger_scratch_sizeof(0))/8\n\n    nx, ny, window := breakdown(nbits, pippenger_window_size(npoints),\n                                numThreads)\n\n    // |grid[]| holds \"coordinates\" and place for result\n    grid := make([]struct { x, dx, y, dy int\n                            point P1 }, nx*ny)\n\n    dx := npoints/nx\n    y := window*(ny-1)\n    total := 0\n    for ; total < nx; total++ {\n        grid[total].x = total*dx\n        grid[total].dx = dx\n        grid[total].y = y\n        grid[total].dy = nbits - y\n    }\n    grid[total-1].dx = npoints - grid[total-1].x\n\n    for y > 0 {\n        y -= window\n        for i := 0; i < nx; i++ {\n            grid[total].x = grid[i].x\n            grid[total].dx = grid[i].dx\n            grid[total].y = y\n            grid[total].dy = window\n            total++\n        }\n    }\n\n    if numThreads > total {\n        numThreads = total\n    }\n\n    msgsCh := make(chan int, ny)\n    rowSync := make([]int32, ny)    // count up to |nx|\n    curItem := int32(0)\n    for tid := 0; tid < numThreads; tid++ {\n        go func() {\n            scratch := make([]uint64, sz << uint(window-1))\n            pointsBySlice := [2]*C.blst_p1_affine{nil, nil}\n            scalarsBySlice := [2]*C.byte{nil, nil}\n            _cgoCheckPointer := func(...interface{}) {}\n\n            for {\n                workItem := atomic.AddInt32(&curItem, 1) - 1\n                if int(workItem) >= total {\n                    break\n                }\n\n                x := grid[workItem].x\n                y := grid[workItem].y\n\n                var p_points **C.blst_p1_affine\n                switch val := pointsIf.(type) {\n                case []*P1Affine:\n                    p_points = (**C.blst_p1_affine)(unsafe.Pointer(&val[x]))\n                case []P1Affine:\n                    pointsBySlice[0] = &val[x].cgo\n                    p_points = &pointsBySlice[0]\n                case P1Affines:\n                    pointsBySlice[0] = &val[x].cgo\n                    p_points = &pointsBySlice[0]\n                default: // type is already vetted\n                }\n\n                var p_scalars **C.byte\n                switch val := scalarsIf.(type) {\n                case []byte:\n                    scalarsBySlice[0] = (*C.byte)(&val[x*nbytes])\n                    p_scalars = &scalarsBySlice[0]\n                case [][]byte:\n                    p_scalars = &scalars[x]\n                case []Scalar:\n                    if nbits > 248 {\n                        scalarsBySlice[0] = &val[x].cgo.b[0]\n                        p_scalars = &scalarsBySlice[0]\n                    } else {\n                        p_scalars = &scalars[x]\n                    }\n                case []*Scalar:\n                    p_scalars = &scalars[x]\n                default: // type is already vetted\n                }\n\n                C.blst_p1s_tile_pippenger(&grid[workItem].point.cgo,\n                                          p_points, C.size_t(grid[workItem].dx),\n                                          p_scalars, C.size_t(nbits),\n                                          (*C.limb_t)(&scratch[0]),\n                                          C.size_t(y), C.size_t(window));\n\n                if atomic.AddInt32(&rowSync[y/window], 1) == int32(nx) {\n                    msgsCh <- y     // \"row\" is done\n                } else {\n                    runtime.Gosched()   // be nice to the application\n                }\n            }\n\n            pointsBySlice[0] = nil\n            scalarsBySlice[0] = nil\n        }()\n    }\n\n    var ret P1\n    rows := make([]bool, ny)\n    row := 0                        // actually index in |grid[]|\n    for i := 0; i < ny; i++ {       // we expect |ny| messages, one per \"row\"\n        y := <- msgsCh\n        rows[y/window] = true       // mark the \"row\"\n        for grid[row].y == y {      // if it's current \"row\", process it\n            for row < total && grid[row].y == y {\n                C.blst_p1_add_or_double(&ret.cgo, &ret.cgo, &grid[row].point.cgo)\n                row++\n            }\n            if y == 0 {\n                break               // one can as well 'return &ret' here\n            }\n            for j := 0; j < window; j++ {\n                C.blst_p1_double(&ret.cgo, &ret.cgo)\n            }\n            y -= window\n            if !rows[y/window] {    // see if next \"row\" was marked already\n                break\n            }\n        }\n    }\n\n    for i := range(scalars) {\n        scalars[i] = nil\n    }\n\n    return &ret\n}\n\nfunc (points P1Affines) Mult(scalarsIf interface{}, nbits int) *P1 {\n    return P1AffinesMult(points, scalarsIf, nbits)\n}\n\nfunc (points P1s) Mult(scalarsIf interface{}, nbits int) *P1 {\n    return points.ToAffine().Mult(scalarsIf, nbits)\n}\n\n//\n// Group-check\n//\n\nfunc P1AffinesValidate(pointsIf interface{}) bool {\n    var npoints int\n    switch val := pointsIf.(type) {\n    case []*P1Affine:\n        npoints = len(val)\n    case []P1Affine:\n        npoints = len(val)\n    case P1Affines:\n        npoints = len(val)\n    default:\n        panic(fmt.Sprintf(\"unsupported type %T\", val))\n    }\n\n    numThreads := numThreads(npoints)\n\n    if numThreads < 2 {\n        for i := 0; i < npoints; i++ {\n            var point *P1Affine\n\n            switch val := pointsIf.(type) {\n            case []*P1Affine:\n                point = val[i]\n            case []P1Affine:\n                point = &val[i]\n            case P1Affines:\n                point = &val[i]\n            default:\n                panic(fmt.Sprintf(\"unsupported type %T\", val))\n            }\n\n            if !C.go_p1_affine_validate(&point.cgo, true) {\n                return false\n            }\n        }\n\n        return true\n    }\n\n    valid := int32(1)\n    curItem := uint32(0)\n\n    var wg sync.WaitGroup\n    wg.Add(numThreads)\n\n    for tid := 0; tid < numThreads; tid++ {\n        go func() {\n            for atomic.LoadInt32(&valid) != 0 {\n                work := atomic.AddUint32(&curItem, 1) - 1\n                if work >= uint32(npoints) {\n                    break\n                }\n\n                var point *P1Affine\n\n                switch val := pointsIf.(type) {\n                case []*P1Affine:\n                    point = val[work]\n                case []P1Affine:\n                    point = &val[work]\n                case P1Affines:\n                    point = &val[work]\n                default:\n                    panic(fmt.Sprintf(\"unsupported type %T\", val))\n                }\n\n                if !C.go_p1_affine_validate(&point.cgo, true) {\n                    atomic.StoreInt32(&valid, 0)\n                    break\n                }\n            }\n\n            wg.Done()\n        }()\n    }\n\n    wg.Wait()\n\n    return atomic.LoadInt32(&valid) != 0\n}\n\nfunc (points P1Affines) Validate() bool {\n    return P1AffinesValidate(points)\n}\n"
  },
  {
    "path": "bindings/go/blst_wasm.go",
    "content": "//go:build wasm\npackage not_supported\n"
  },
  {
    "path": "bindings/go/cgo_assembly.S",
    "content": "#include \"assembly.S\"\n"
  },
  {
    "path": "bindings/go/cgo_server.c",
    "content": "#include \"server.c\"\n"
  },
  {
    "path": "bindings/go/generate.py",
    "content": "#!/usr/bin/env python3\n\nimport os\nimport sys\nimport re\nimport subprocess\n\nhere = re.split(r'/(?=[^/]*$)', sys.argv[0])\nif len(here) > 1:\n  os.chdir(here[0])\n\nfor dir in re.split(r':', os.getenv(\"GOPATH\")):\n  goimports = dir + \"/bin/goimports\"\n  if os.path.isfile(goimports) and os.access(goimports, os.X_OK):\n    break\n  goimports = None\n\nif goimports is None:\n  version = subprocess.check_output([\"go\", \"version\"]).decode('ascii')\n  v = re.search(r'version go([0-9]+\\.[0-9]+)', version)\n  if not v:\n    raise OSError(2, \"unparseable output from 'go version'\")\n  if float(v.group(1)) < 1.17:\n    advice = \"'go get golang.org/x/tools/cmd/goimports'\"\n  else:\n    advice = \"'go install golang.org/x/tools/cmd/goimports@latest'\"\n  print(\"'goimports' is not found on $GOPATH, install with\", file=sys.stderr)\n  print(advice,                                              file=sys.stderr)\n  sys.exit(1)\n\noutFile = 'blst.go'\n\n\ndef concatFile(fout, fin, removeImports):\n  for line in fin:\n    if removeImports and 'import' in line:\n      while ')' not in line:\n        line = fin.readline()\n      continue\n    print(line, file=fout, end='')\n\n\ndef remap(fout, fin, mapping, dont_touch, removeImports):\n  for line in fin:\n    if removeImports and 'import' in line:\n      while ')' not in line:\n        line = fin.readline()\n      continue\n    for (a, b) in dont_touch:\n      line = line.replace(a, b)\n\n    for (a, b) in mapping:\n      line = line.replace(a, a+\"_tmp\")\n      line = line.replace(b, b+\"_tmp\")\n      line = line.replace(a+\"_tmp\", b)\n      line = line.replace(b+\"_tmp\", a)\n\n    for (a, b) in dont_touch:\n      line = line.replace(b, a)\n    print(line, file=fout, end='')\n\nfout = open(outFile, \"w\")\n\nprint(\"// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fout)\nprint(\"// DO NOT MODIFY THIS FILE!!\",                        file=fout)\nprint(\"// The file is generated from *.tgo by \" + here[-1],  file=fout)\nprint(\"// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fout)\n\nfin = open('blst.tgo', \"r\")\nconcatFile(fout, fin, False)\nfin.close()\n\n# min-pk\nprint(\"//\", file=fout)\nprint(\"// MIN-PK\", file=fout)\nprint(\"//\", file=fout)\n\nfin = open('blst_minpk.tgo', \"r\")\nconcatFile(fout, fin, True)\nfin.close()\n\n# These are strings that overlap with the mapping names but we don't\n# actually want to change. The second value should be a unique string.\ndont_touch = (('Fp12', 'foo1234'),)\n\n# We're going to swap these names to get from min-pk to min-sig\nmapping = [('P1', 'P2'),\n           ('p1', 'p2'),\n           ('Fp', 'Fp2'),\n           ('C.blst_fp', 'C.blst_fp2'),\n           ('G1', 'G2'),\n           ('g1', 'g2')\n          ]\n\n# min-sig\nprint(\"//\", file=fout)\nprint(\"// MIN-SIG\", file=fout)\nprint(\"//\", file=fout)\n\nwith open('blst_minpk.tgo', \"r\") as fin:\n  remap(fout, fin, mapping, dont_touch, True)\n\n# serdes and other functions\nfin = open('blst_px.tgo', \"r\")\nconcatFile(fout, fin, True)\nfin.close()\n\nwith open('blst_px.tgo', \"r\") as fin:\n  remap(fout, fin, mapping, dont_touch, True)\n\n# final code\nfin = open('blst_misc.tgo', \"r\")\nconcatFile(fout, fin, True)\nfin.close()\n\nfout.close()\n\n# Use goimports to generate the import list\nos.system(goimports + \" -w blst.go\")\n\n# Generate min-sig tests\nfout = open('blst_minsig_test.go', \"w\")\nprint(\"// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fout)\nprint(\"// DO NOT EDIT THIS FILE!!\",                          file=fout)\nprint(\"// The file is generated from blst_minpk_test.go by \" + here[-1],  file=fout)\nprint(\"// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fout)\n\nmapping.append(('MinPk', 'MinSig'))\n\nwith open('blst_minpk_test.go', \"r\") as fin:\n  remap(fout, fin, mapping, dont_touch, False)\nfout.close()\n"
  },
  {
    "path": "bindings/go/rb_tree.go",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n/*\n * Reimplement rb_tree.c, because C.call overhead is too high in\n * comparison to tree insertion subroutine.\n */\n\npackage blst\n\nimport \"bytes\"\n\n/*\n * Red-black tree tailored for uniqueness test. Amount of messages to be\n * checked is known prior context initialization, implementation is\n * insert-only, failure is returned if message is already in the tree.\n */\n\nconst red, black bool = true, false\n\ntype node struct {\n    leafs  [2]*node\n    data   *[]byte\n    colour bool\n}\n\ntype rbTree struct {\n    root   *node\n    nnodes uint\n    nodes  []node\n}\n\nfunc (tree *rbTree) insert(data *[]byte) bool {\n    var nodes [64]*node     /* visited nodes    */\n    var dirs  [64]byte      /* taken directions */\n    var k uint              /* walked distance  */\n\n    for p := tree.root; p != nil && k < 64; k++ {\n        cmp := bytes.Compare(*data, *p.data)\n\n        if cmp == 0 {\n            return false    /* already in tree, no insertion */\n        }\n\n        /* record the step */\n        nodes[k] = p\n        if cmp > 0 {\n            dirs[k] = 1\n        } else {\n            dirs[k] = 0\n        }\n        p = p.leafs[dirs[k]]\n    }\n\n    if k == 64 {\n        return false\n    }\n\n    /* allocate new node */\n    z := &tree.nodes[tree.nnodes]; tree.nnodes++\n    z.data = data\n    z.colour = red\n\n    /* graft |z| */\n    if k > 0 {\n        nodes[k-1].leafs[dirs[k-1]] = z\n    } else {\n        tree.root = z\n    }\n\n    /* re-balance |tree| */\n    for k >= 2 /* && IS_RED(y = nodes[k-1]) */ {\n        y := nodes[k-1]\n        if y.colour == black  { //nolint:staticcheck\n            break\n        }\n\n        ydir := dirs[k-2]\n        x := nodes[k-2]         /* |z|'s grandparent    */\n        s := x.leafs[ydir^1]    /* |z|'s uncle          */\n\n        if s != nil && s.colour == red { //nolint:staticcheck,revive\n            x.colour = red\n            y.colour = black\n            s.colour = black\n            k -= 2\n        } else {\n            if dirs[k-1] != ydir {\n                /*    |        |\n                 *    x        x\n                 *   / \\        \\\n                 *  y   s -> z   s\n                 *   \\      /\n                 *    z    y\n                 *   /      \\\n                 *  ?        ?\n                 */\n                t := y\n                y = y.leafs[ydir^1]\n                t.leafs[ydir^1] = y.leafs[ydir]\n                y.leafs[ydir] = t\n            }\n\n            /*      |        |\n             *      x        y\n             *       \\      / \\\n             *    y   s -> z   x\n             *   / \\          / \\\n             *  z   ?        ?   s\n             */\n            x.leafs[ydir] = y.leafs[ydir^1]\n            y.leafs[ydir^1] = x\n\n            x.colour = red\n            y.colour = black\n\n            if k > 2 {\n                nodes[k-3].leafs[dirs[k-3]] = y\n            } else {\n                tree.root = y\n            }\n\n            break\n        }\n    }\n\n    tree.root.colour = black\n\n    return true\n}\n\nfunc Uniq(msgs []Message) bool {\n    n := len(msgs)\n\n    if n == 1 { //nolint:staticcheck\n        return true\n    } else if n == 2 {\n        return !bytes.Equal(msgs[0], msgs[1])\n    }\n\n    var tree rbTree\n    tree.nodes = make([]node, n)\n\n    for i := 0; i < n; i++ {\n        if !tree.insert(&msgs[i]) {\n            return false\n        }\n    }\n\n    return true\n}\n"
  },
  {
    "path": "bindings/rust/Cargo.toml",
    "content": "[package]\nname = \"blst\"\nversion = \"0.3.16\"\nauthors = [\"sean-sn <sean@supranational.net>\"]\nedition = \"2018\"\nlicense = \"Apache-2.0\"\ndescription = \"Bindings for blst BLS12-381 library\"\nrepository = \"https://github.com/supranational/blst\"\nreadme = \"README.md\"\ncategories = [\"cryptography\"]\nkeywords = [\"crypto\", \"bls\", \"signature\", \"asm\", \"wasm\"]\ninclude = [\n    \"**/*.rs\",\n    \"/Cargo.toml\",\n    \"/README.md\",\n    \"/rustfmt.toml\",\n    \"/blst/src/*.c\",\n    \"/blst/src/*.h*\",\n    \"/blst/build/**\",\n    \"/blst/bindings/blst.h\",\n    \"/blst/bindings/blst_aux.h\",\n    \"/blst/bindings/blst.hpp\",\n]\nlinks = \"blst\"\n\n[features]\n# By default, compile with ADX extension if the host supports it.\n# Binary can be executed on systems similar to the host.\ndefault = []\n# Compile in portable mode, without ISA extensions.\n# Binary can be executed on all systems.\nportable = []\n# Enable ADX even if the host CPU doesn't support it.\n# Binary can be executed on Broadwell+ and Ryzen+ systems.\nforce-adx = []\n# Suppress multi-threading.\n# Engaged on wasm32 target architecture automatically.\nno-threads = []\n# Add support for serializing SecretKey, not suitable for production.\nserde-secret = [\"serde\"]\n\n[build-dependencies]\ncc = \"1.0\"\n[target.'cfg(target_env = \"msvc\")'.build-dependencies]\nglob = \"0.3\"\n\n[dependencies]\nzeroize = { version = \"^1.1\", features = [\"zeroize_derive\"] }\nserde = { version = \"1.0.152\", optional = true }\n\n[target.'cfg(not(any(target_arch=\"wasm32\", target_os=\"none\", target_os=\"unknown\", target_os=\"uefi\")))'.dependencies]\nthreadpool = \"^1.8.1\"\n\n[dev-dependencies]\nrand = \"0.8\"\nrand_chacha = \"0.3\"\nrmp-serde = \"1.1.1\"\n# Uncomment if you want to execute the test suite with Rust 1.56 through 1.64.\n#byteorder = \"=1.4.3\"\n#rmp = \"=0.8.12\"\n#ppv-lite86 = \"=0.2.17\"\n\n[target.'cfg(any(unix, windows))'.dev-dependencies]\ncriterion = \"0.3\"\n\n[[bench]]\nname = \"blst_benches\"\nharness = false\n\n[profile.release]\n#opt-level = 3\n\n[badges]\nmaintenance = { status = \"actively-developed\" }\n"
  },
  {
    "path": "bindings/rust/README.md",
    "content": "# blst [![Crates.io](https://img.shields.io/crates/v/blst.svg)](https://crates.io/crates/blst)\n\nThe `blst` crate provides a rust interface to the blst BLS12-381 signature library.\n\n## Build\n[bindgen](https://github.com/rust-lang/rust-bindgen) is used to generate FFI bindings to blst.h. Then [build.rs](https://github.com/supranational/blst/blob/master/bindings/rust/build.rs) invokes C compiler to compile everything into libblst.a within the rust target build area. On Linux it's possible to choose compiler by setting `CC` environment variable.\n\nEverything can be built and run with the typical cargo commands:\n\n```\ncargo test\ncargo bench\n```\n\nIf the target application crashes with an \"illegal instruction\" exception [after copying to an older system], activate `portable` feature when building blst. Conversely, if you compile on an older Intel system, but will execute the binary on a newer one, consider instead activating <nobr>`force-adx`</nobr> feature. Though keep in mind that [cc](https://crates.io/crates/cc) passes the value of `CFLAGS` environment variable to the C compiler, and if set to contain specific flags, it can interfere with feature selection. <nobr>`-D__BLST_PORTABLE__`</nobr> and <nobr>`-D__ADX__`</nobr> are the said features' equivalents.\n\nTo compile for WebAssembly, your clang has to recognize `--target=wasm32`. Alternatively you can build your project with `CC` environment variable set to `emcc`, the [Emscripten compiler](https://emscripten.org), and `AR` set to `emar`, naturally, with both commands available on your `PATH`.\n\nWhile `cargo test`'s dependencies happen to require at least Rust 1.65, the library by itself can be compiled with earlier compiler versions. Though in order to use Rust version prior 1.56 you would need to pin`zeroize` to \"=1.3.0\" and `zeroize_derive` to \"=1.3.3\" in **your** project Cargo.toml. Even `cc` might require pinning to \"=1.0.79\". And if you find yourself with Rust 1.56 through 1.64 as the only option and want to execute `cargo test` you'd need to pin some of `[dev-dependencies]` versions in **this** project's Cargo.toml by uncommenting following lines and commenting `criterion`:\n\n```\nbyteorder = \"=1.4.3\"\nppv-lite86 = \"=0.2.17\"\nrmp = \"=0.8.12\"\n\n[target.'cfg(any(unix, windows))'.dev-dependencies]\n#criterion = \"0.3\"\n```\n\n## Usage\nThere are two primary modes of operation that can be chosen based on declaration path:\n\nFor minimal-pubkey-size operations:\n```rust\nuse blst::min_pk::*;\n```\n\nFor minimal-signature-size operations:\n```rust\nuse blst::min_sig::*;\n```\n\nThere are five structs with inherent implementations that provide the BLS12-381 signature functionality.\n```\nSecretKey\nPublicKey\nAggregatePublicKey\nSignature\nAggregateSignature\n```\n\nA simple example for generating a key, signing a message, and verifying the message:\n```rust\nuse blst::min_pk::SecretKey;\n\nlet mut rng = rand::thread_rng();\nlet mut ikm = [0u8; 32];\nrng.fill_bytes(&mut ikm);\n\nlet sk = SecretKey::key_gen(&ikm, &[]).unwrap();\nlet pk = sk.sk_to_pk();\n\nlet dst = b\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_NUL_\";\nlet msg = b\"blst is such a blast\";\nlet sig = sk.sign(msg, dst, &[]);\n\nlet err = sig.verify(true, msg, dst, &[], &pk, true);\nassert_eq!(err, blst::BLST_ERROR::BLST_SUCCESS);\n```\n\nSee the tests in src/lib.rs and benchmarks in benches/blst_benches.rs for further examples of usage.\n"
  },
  {
    "path": "bindings/rust/benches/blst_benches.rs",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\nuse blst::*;\n\n// Benchmark min_pk\nuse blst::min_pk::*;\n\nuse criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};\nuse rand::{RngCore, SeedableRng};\nuse rand_chacha::ChaCha20Rng;\n\nstruct BenchData {\n    sk: SecretKey,\n    pk: PublicKey,\n    msg: Vec<u8>,\n    dst: Vec<u8>,\n    sig: Signature,\n}\n\nfn gen_bench_data(rng: &mut rand_chacha::ChaCha20Rng) -> BenchData {\n    let msg_len = (rng.next_u64() & 0x3F) + 1;\n    let mut msg = vec![0u8; msg_len as usize];\n    rng.fill_bytes(&mut msg);\n\n    gen_bench_data_for_msg(rng, &msg)\n}\n\nfn gen_bench_data_for_msg(\n    rng: &mut rand_chacha::ChaCha20Rng,\n    msg: &Vec<u8>,\n) -> BenchData {\n    let mut ikm = [0u8; 32];\n    rng.fill_bytes(&mut ikm);\n\n    let sk = SecretKey::key_gen(&ikm, &[]).unwrap();\n    let pk = sk.sk_to_pk();\n    let dst = \"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_NUL_\"\n        .as_bytes()\n        .to_owned();\n\n    let sig = sk.sign(&msg, &dst, &[]);\n\n    let bd = BenchData {\n        sk,\n        pk,\n        dst,\n        msg: msg.clone(),\n        sig,\n    };\n    bd\n}\n\nfn bench_verify_multi_aggregate(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"verify_multi_aggregate\");\n\n    let dst = b\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_\";\n    let mut ikm = [0u8; 32];\n\n    let seed = [0u8; 32];\n    let mut rng = ChaCha20Rng::from_seed(seed);\n\n    let num_sigs = vec![8, 16, 32, 64, 128];\n    let pks_per_sig = 3;\n\n    for n in num_sigs.iter() {\n        let mut msgs: Vec<Vec<u8>> = vec![vec![]; *n];\n        let mut sigs: Vec<Signature> = Vec::with_capacity(*n);\n        let mut pks: Vec<PublicKey> = Vec::with_capacity(*n);\n        let mut rands: Vec<blst_scalar> = Vec::with_capacity(*n);\n\n        for i in 0..*n {\n            // Create public keys\n            rng.fill_bytes(&mut ikm);\n            let sks_i: Vec<_> = (0..pks_per_sig)\n                .map(|_| {\n                    ikm[0] += 1;\n                    SecretKey::key_gen(&ikm, &[]).unwrap()\n                })\n                .collect();\n            let pks_i =\n                sks_i.iter().map(|sk| sk.sk_to_pk()).collect::<Vec<_>>();\n            let pks_refs_i: Vec<&PublicKey> =\n                pks_i.iter().map(|pk| pk).collect();\n\n            // Create random message for pks to all sign\n            let msg_len = (rng.next_u64() & 0x3F) + 1;\n            msgs[i] = vec![0u8; msg_len as usize];\n            rng.fill_bytes(&mut msgs[i]);\n\n            // Generate signature for each key pair\n            let sigs_i = sks_i\n                .iter()\n                .map(|sk| sk.sign(&msgs[i], dst, &[]))\n                .collect::<Vec<Signature>>();\n\n            // Aggregate signature\n            let sig_refs_i =\n                sigs_i.iter().map(|s| s).collect::<Vec<&Signature>>();\n            let agg_i = match AggregateSignature::aggregate(&sig_refs_i, false)\n            {\n                Ok(agg_i) => agg_i,\n                Err(err) => panic!(\"aggregate failure: {:?}\", err),\n            };\n            sigs.push(agg_i.to_signature());\n\n            // aggregate public keys and push into vec\n            let agg_pk_i =\n                match AggregatePublicKey::aggregate(&pks_refs_i, false) {\n                    Ok(agg_pk_i) => agg_pk_i,\n                    Err(err) => panic!(\"aggregate failure: {:?}\", err),\n                };\n            pks.push(agg_pk_i.to_public_key());\n\n            // create random values\n            let mut vals = [0u64; 4];\n            vals[0] = rng.next_u64();\n            let mut rand_i = std::mem::MaybeUninit::<blst_scalar>::uninit();\n            unsafe {\n                blst_scalar_from_uint64(rand_i.as_mut_ptr(), vals.as_ptr());\n                rands.push(rand_i.assume_init());\n            }\n        }\n\n        let msgs_refs: Vec<&[u8]> = msgs.iter().map(|m| m.as_slice()).collect();\n        let sig_refs = sigs.iter().map(|s| s).collect::<Vec<&Signature>>();\n        let pks_refs: Vec<&PublicKey> = pks.iter().map(|pk| pk).collect();\n\n        let agg_ver = (sig_refs, pks_refs, msgs_refs, dst, rands);\n\n        group.bench_with_input(\n            BenchmarkId::new(\"verify_multi_aggregate\", n),\n            &agg_ver,\n            |b, (s, p, m, d, r)| {\n                b.iter(|| {\n                    let result =\n                        Signature::verify_multiple_aggregate_signatures(\n                            &m, *d, &p, false, &s, false, &r, 64,\n                        );\n                    assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n                });\n            },\n        );\n    }\n\n    group.finish();\n}\n\nfn bench_fast_aggregate_verify(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"fast_aggregate_verify\");\n\n    let seed = [0u8; 32];\n    let mut rng = ChaCha20Rng::from_seed(seed);\n\n    let msg_len = (rng.next_u64() & 0x3F) + 1;\n    let mut msg = vec![0u8; msg_len as usize];\n    rng.fill_bytes(&mut msg);\n\n    let sizes = vec![8, 16, 32, 64, 128];\n\n    let bds: Vec<_> = (0..sizes[sizes.len() - 1])\n        .map(|_| gen_bench_data_for_msg(&mut rng, &msg))\n        .collect();\n\n    for size in sizes.iter() {\n        let pks_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| &s.pk)\n            .collect::<Vec<&PublicKey>>();\n\n        let sig_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| &s.sig)\n            .collect::<Vec<&Signature>>();\n\n        let agg = match AggregateSignature::aggregate(&sig_refs, false) {\n            Ok(agg) => agg,\n            Err(err) => panic!(\"aggregate failure: {:?}\", err),\n        };\n        let agg_sig = agg.to_signature();\n\n        let agg_pks = match AggregatePublicKey::aggregate(&pks_refs, false) {\n            Ok(agg_pks) => agg_pks,\n            Err(err) => panic!(\"aggregate failure: {:?}\", err),\n        };\n        let agg_pk = agg_pks.to_public_key();\n\n        let agg_ver = (agg_sig, pks_refs, &bds[0].msg, &bds[0].dst);\n        let agg_pre_ver = (agg_sig, agg_pk, &bds[0].msg, &bds[0].dst);\n\n        group.bench_with_input(\n            BenchmarkId::new(\"fast_aggregate_verify\", size),\n            &agg_ver,\n            |b, (a, p, m, d)| {\n                b.iter(|| {\n                    let result = a.fast_aggregate_verify(true, &m, &d, &p);\n                    assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n                });\n            },\n        );\n\n        group.bench_with_input(\n            BenchmarkId::new(\"fast_aggregate_verify_preagg\", size),\n            &agg_pre_ver,\n            |b, (a, p, m, d)| {\n                b.iter(|| {\n                    let result = a\n                        .fast_aggregate_verify_pre_aggregated(true, &m, &d, &p);\n                    assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n                });\n            },\n        );\n    }\n\n    group.finish();\n}\n\nfn bench_aggregate_verify(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"aggregate_verify\");\n\n    let seed = [0u8; 32];\n    let mut rng = ChaCha20Rng::from_seed(seed);\n\n    let sizes = vec![8, 16, 32, 64, 128];\n    // [10, 50, 100, 300, 1000, 4000];\n\n    let bds: Vec<_> = (0..sizes[sizes.len() - 1])\n        .map(|_| gen_bench_data(&mut rng))\n        .collect();\n\n    for size in sizes.iter() {\n        let msgs_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| s.msg.as_slice())\n            .collect::<Vec<&[u8]>>();\n\n        let pks_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| &s.pk)\n            .collect::<Vec<&PublicKey>>();\n\n        let sig_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| &s.sig)\n            .collect::<Vec<&Signature>>();\n\n        let agg = match AggregateSignature::aggregate(&sig_refs, false) {\n            Ok(agg) => agg,\n            Err(err) => panic!(\"aggregate failure: {:?}\", err),\n        };\n        let agg_sig = agg.to_signature();\n        let agg_ver = (agg_sig, pks_refs, msgs_refs, &bds[0].dst);\n\n        group.bench_with_input(\n            BenchmarkId::new(\"aggregate_verify\", size),\n            &agg_ver,\n            |b, (a, p, m, d)| {\n                b.iter(|| {\n                    let result = a.aggregate_verify(true, &m, &d, &p, false);\n                    assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n                });\n            },\n        );\n    }\n\n    group.finish();\n}\n\nfn bench_aggregate(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"aggregate\");\n\n    let seed = [0u8; 32];\n    let mut rng = ChaCha20Rng::from_seed(seed);\n\n    let sizes: [usize; 6] = [10, 50, 100, 300, 1000, 4000];\n\n    let bds: Vec<_> = (0..4000).map(|_| gen_bench_data(&mut rng)).collect();\n\n    for size in sizes.iter() {\n        let sig_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| &s.sig)\n            .collect::<Vec<&Signature>>();\n\n        group.bench_with_input(\n            BenchmarkId::new(\"aggregate_signature\", size),\n            &sig_refs,\n            |b, s| {\n                b.iter(|| AggregateSignature::aggregate(&s, false));\n            },\n        );\n\n        let pks_refs = bds\n            .iter()\n            .take(*size)\n            .map(|s| &s.pk)\n            .collect::<Vec<&PublicKey>>();\n\n        group.bench_with_input(\n            BenchmarkId::new(\"aggregate_public_key\", size),\n            &pks_refs,\n            |b, p| {\n                b.iter(|| AggregatePublicKey::aggregate(&p, false));\n            },\n        );\n    }\n\n    group.finish();\n}\n\nfn bench_single_message(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"single_message\");\n\n    let seed = [0u8; 32];\n    let mut rng = ChaCha20Rng::from_seed(seed);\n    let bd = gen_bench_data(&mut rng);\n\n    group.bench_function(\"sign\", |b| {\n        b.iter(|| bd.sk.sign(&bd.msg, &bd.dst, &[]))\n    });\n\n    group.bench_function(\"verify\", |b| {\n        b.iter(|| bd.sig.verify(true, &bd.msg, &bd.dst, &[], &bd.pk, false))\n    });\n\n    group.finish();\n}\n\nfn bench_serdes(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"serdes\");\n\n    let seed = [0u8; 32];\n    let mut rng = ChaCha20Rng::from_seed(seed);\n    let bd = gen_bench_data(&mut rng);\n\n    let sk = bd.sk;\n    let sk_ser = sk.serialize();\n\n    let pk = bd.pk;\n    let pk_comp = pk.compress();\n    let pk_ser = pk.serialize();\n\n    let sig = bd.sig;\n    let sig_comp = sig.compress();\n    let sig_ser = sig.serialize();\n\n    let mut pk_jac = std::mem::MaybeUninit::<blst_p1>::uninit();\n    let mut sig_jac = std::mem::MaybeUninit::<blst_p2>::uninit();\n\n    let mut p1_comp = [0; 48];\n    let mut p2_comp = [0; 96];\n    let mut p1_ser = [0; 96];\n    let mut p2_ser = [0; 192];\n\n    unsafe {\n        let mut junk = [0u8; 32];\n        rng.fill_bytes(&mut junk);\n        blst_encode_to_g1(\n            pk_jac.as_mut_ptr(),\n            junk.as_ptr(),\n            junk.len(),\n            \"junk\".as_ptr(),\n            4,\n            std::ptr::null(),\n            0,\n        );\n        blst_encode_to_g2(\n            sig_jac.as_mut_ptr(),\n            junk.as_ptr(),\n            junk.len(),\n            \"junk\".as_ptr(),\n            4,\n            std::ptr::null(),\n            0,\n        );\n    }\n\n    group.bench_function(\"secret_key_serialize\", |b| b.iter(|| sk.serialize()));\n\n    group.bench_function(\"secret_key_deserialize\", |b| {\n        b.iter(|| SecretKey::deserialize(&sk_ser));\n    });\n\n    group.bench_function(\"public_key_serialize\", |b| b.iter(|| pk.serialize()));\n\n    group.bench_function(\"public_key_compress\", |b| b.iter(|| pk.compress()));\n\n    group.bench_function(\"public_key_uncompress\", |b| {\n        b.iter(|| PublicKey::uncompress(&pk_comp))\n    });\n\n    group.bench_function(\"public_key_deserialize\", |b| {\n        b.iter(|| PublicKey::deserialize(&pk_ser));\n    });\n\n    group.bench_function(\"signature_serialize\", |b| b.iter(|| sig.serialize()));\n\n    group.bench_function(\"signature_compress\", |b| b.iter(|| sig.compress()));\n\n    group.bench_function(\"signature_uncompress\", |b| {\n        b.iter(|| Signature::uncompress(&sig_comp))\n    });\n\n    group.bench_function(\"signature_deserialize\", |b| {\n        b.iter(|| Signature::deserialize(&sig_ser))\n    });\n\n    group.bench_function(\"p1_serialize\", |b| {\n        b.iter(|| unsafe {\n            blst_p1_serialize(p1_ser.as_mut_ptr(), pk_jac.as_ptr())\n        })\n    });\n\n    group.bench_function(\"p1_compress\", |b| {\n        b.iter(|| unsafe {\n            blst_p1_compress(p1_comp.as_mut_ptr(), pk_jac.as_ptr())\n        })\n    });\n\n    group.bench_function(\"p2_serialize\", |b| {\n        b.iter(|| unsafe {\n            blst_p2_serialize(p2_ser.as_mut_ptr(), sig_jac.as_ptr())\n        })\n    });\n\n    group.bench_function(\"p2_compress\", |b| {\n        b.iter(|| unsafe {\n            blst_p2_compress(p2_comp.as_mut_ptr(), sig_jac.as_ptr())\n        })\n    });\n\n    group.finish();\n}\n\nfn bench_keys(c: &mut Criterion) {\n    let mut group = c.benchmark_group(\"keys\");\n    let ikm: [u8; 32] = [\n        0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a, 0x08, 0x3a, 0x91, 0x0c,\n        0x8b, 0x72, 0x85, 0x91, 0x46, 0x4c, 0xca, 0x56, 0x60, 0x5b, 0xb0, 0x56,\n        0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c, 0x48, 0x99,\n    ];\n    let sk = SecretKey::key_gen(&ikm, &[]).unwrap();\n    let pk = sk.sk_to_pk();\n    let pk_comp = pk.compress();\n\n    group.bench_function(\"key_gen\", |b| {\n        b.iter(|| SecretKey::key_gen(&ikm, &[]))\n    });\n\n    group.bench_function(\"sk_to_pk\", |b| {\n        b.iter(|| sk.sk_to_pk());\n    });\n\n    group.bench_function(\"key_validate\", |b| {\n        b.iter(|| PublicKey::key_validate(&pk_comp));\n    });\n\n    group.finish();\n}\n\ncriterion_group!(\n    benches,\n    bench_verify_multi_aggregate,\n    bench_fast_aggregate_verify,\n    bench_aggregate_verify,\n    bench_aggregate,\n    bench_single_message,\n    bench_serdes,\n    bench_keys\n);\ncriterion_main!(benches);\n"
  },
  {
    "path": "bindings/rust/build.rs",
    "content": "#![allow(unused_imports)]\n\nextern crate cc;\n\nuse std::env;\nuse std::path::{Path, PathBuf};\n\nfn assembly(\n    file_vec: &mut Vec<PathBuf>,\n    base_dir: &Path,\n    _arch: &str,\n    _is_msvc: bool,\n) {\n    #[cfg(target_env = \"msvc\")]\n    if _is_msvc {\n        let sfx = match _arch {\n            \"x86_64\" => \"x86_64\",\n            \"aarch64\" => \"armv8\",\n            _ => \"unknown\",\n        };\n        let files =\n            glob::glob(&format!(\"{}/win64/*-{}.asm\", base_dir.display(), sfx))\n                .expect(\"unable to collect assembly files\");\n        for file in files {\n            file_vec.push(file.unwrap());\n        }\n        return;\n    }\n\n    file_vec.push(base_dir.join(\"assembly.S\"));\n}\n\nfn main() {\n    if env::var(\"CARGO_FEATURE_SERDE_SECRET\").is_ok() {\n        println!(\n            \"cargo:warning=blst: non-production feature serde-secret enabled\"\n        );\n    }\n\n    // account for cross-compilation [by examining environment variables]\n    let target_os = env::var(\"CARGO_CFG_TARGET_OS\").unwrap();\n    let target_env = env::var(\"CARGO_CFG_TARGET_ENV\").unwrap();\n    let target_arch = env::var(\"CARGO_CFG_TARGET_ARCH\").unwrap();\n    let target_family = env::var(\"CARGO_CFG_TARGET_FAMILY\").unwrap_or_default();\n\n    let target_no_std = target_os.eq(\"none\")\n        || (target_os.eq(\"unknown\") && target_arch.eq(\"wasm32\"))\n        || target_os.eq(\"uefi\")\n        || env::var(\"BLST_TEST_NO_STD\").is_ok();\n\n    if !target_no_std {\n        println!(\"cargo:rustc-cfg=feature=\\\"std\\\"\");\n        if target_arch.eq(\"wasm32\") || target_os.eq(\"unknown\") {\n            println!(\"cargo:rustc-cfg=feature=\\\"no-threads\\\"\");\n        }\n    }\n    println!(\"cargo:rerun-if-env-changed=BLST_TEST_NO_STD\");\n\n    /*\n     * Use pre-built libblst.a if there is one. This is primarily\n     * for trouble-shooting purposes. Idea is that libblst.a can be\n     * compiled with flags independent from cargo defaults, e.g.\n     * '../../build.sh -O1 ...'.\n     */\n    if Path::new(\"libblst.a\").exists() {\n        println!(\"cargo:rustc-link-search=.\");\n        println!(\"cargo:rustc-link-lib=blst\");\n        println!(\"cargo:rerun-if-changed=libblst.a\");\n        return;\n    }\n\n    let manifest_dir = PathBuf::from(env::var(\"CARGO_MANIFEST_DIR\").unwrap());\n\n    let mut blst_base_dir = manifest_dir.join(\"blst\");\n    if !blst_base_dir.exists() {\n        // Reach out to ../.., which is the root of the blst repo.\n        // Use an absolute path to avoid issues with relative paths\n        // being treated as strings by `cc` and getting concatenated\n        // in ways that reach out of the OUT_DIR.\n        blst_base_dir = manifest_dir\n            .parent()\n            .and_then(|dir| dir.parent())\n            .expect(\"can't access parent of parent of current directory\")\n            .into();\n    }\n    println!(\"Using blst source directory {}\", blst_base_dir.display());\n\n    // Set CC environment variable to choose alternative C compiler.\n    // Optimization level depends on whether or not --release is passed\n    // or implied.\n\n    if target_os.eq(\"uefi\") && env::var(\"CC\").is_err() {\n        match std::process::Command::new(\"clang\")\n            .arg(\"--version\")\n            .output()\n        {\n            Ok(_) => env::set_var(\"CC\", \"clang\"),\n            Err(_) => { /* no clang in sight, just ignore the error */ }\n        }\n    }\n\n    if target_env.eq(\"sgx\") && env::var(\"CC\").is_err() {\n        match std::process::Command::new(\"clang\")\n            .arg(\"--version\")\n            .output()\n        {\n            Ok(out) => {\n                let version = String::from_utf8(out.stdout)\n                    .unwrap_or(\"unintelligible\".to_string());\n                if let Some(x) = version.find(\"clang version \") {\n                    let x = x + 14;\n                    let y = version[x..].find('.').unwrap_or(0);\n                    if version[x..x + y].parse::<i32>().unwrap_or(0) >= 11 {\n                        env::set_var(\"CC\", \"clang\");\n                    }\n                }\n            }\n            Err(_) => { /* no clang in sight, just ignore the error */ }\n        }\n    }\n\n    if target_env.eq(\"msvc\")\n        && env::var(\"CARGO_CFG_TARGET_POINTER_WIDTH\").unwrap().eq(\"32\")\n        && env::var(\"CC\").is_err()\n    {\n        match std::process::Command::new(\"clang-cl\")\n            .args([\"-m32\", \"--version\"])\n            .output()\n        {\n            Ok(out) => {\n                if String::from_utf8(out.stdout)\n                    .unwrap_or(\"unintelligible\".to_string())\n                    .contains(\"Target: i386-pc-windows-msvc\")\n                {\n                    env::set_var(\"CC\", \"clang-cl\");\n                }\n            }\n            Err(_) => { /* no clang-cl in sight, just ignore the error */ }\n        }\n    }\n\n    let mut cc = cc::Build::new();\n\n    let c_src_dir = blst_base_dir.join(\"src\");\n    println!(\"cargo:rerun-if-changed={}\", c_src_dir.display());\n    let mut file_vec = vec![c_src_dir.join(\"server.c\")];\n\n    if target_arch.eq(\"x86_64\") || target_arch.eq(\"aarch64\") {\n        let asm_dir = blst_base_dir.join(\"build\");\n        println!(\"cargo:rerun-if-changed={}\", asm_dir.display());\n        assembly(\n            &mut file_vec,\n            &asm_dir,\n            &target_arch,\n            cc.get_compiler().is_like_msvc(),\n        );\n    } else {\n        cc.define(\"__BLST_NO_ASM__\", None);\n    }\n    match (cfg!(feature = \"portable\"), cfg!(feature = \"force-adx\")) {\n        (true, false) => {\n            if target_arch.eq(\"x86_64\") && target_env.eq(\"sgx\") {\n                panic!(\"'portable' is not supported on SGX target\");\n            }\n            println!(\"Compiling in portable mode without ISA extensions\");\n            cc.define(\"__BLST_PORTABLE__\", None);\n        }\n        (false, true) => {\n            if target_arch.eq(\"x86_64\") {\n                println!(\"Enabling ADX support via `force-adx` feature\");\n                cc.define(\"__ADX__\", None);\n            } else {\n                println!(\"`force-adx` is ignored for non-x86_64 targets\");\n            }\n        }\n        (false, false) => {\n            if target_arch.eq(\"x86_64\") {\n                if target_env.eq(\"sgx\") {\n                    println!(\"Enabling ADX for Intel SGX target\");\n                    cc.define(\"__ADX__\", None);\n                } else if env::var(\"CARGO_ENCODED_RUSTFLAGS\")\n                    .unwrap_or_default()\n                    .contains(\"target-cpu=\")\n                {\n                    // If target-cpu is specified on the rustc command line,\n                    // then obey the resulting target-features.\n                    let feat_list = env::var(\"CARGO_CFG_TARGET_FEATURE\")\n                        .unwrap_or_default();\n                    let features: Vec<_> = feat_list.split(',').collect();\n                    if !features.contains(&\"ssse3\") {\n                        println!(\n                            \"Compiling in portable mode without ISA extensions\"\n                        );\n                        cc.define(\"__BLST_PORTABLE__\", None);\n                    } else if features.contains(&\"adx\") {\n                        println!(\n                            \"Enabling ADX because it was set as target-feature\"\n                        );\n                        cc.define(\"__ADX__\", None);\n                    }\n                } else {\n                    #[cfg(target_arch = \"x86_64\")]\n                    if std::is_x86_feature_detected!(\"adx\") {\n                        println!(\n                            \"Enabling ADX because it was detected on the host\"\n                        );\n                        cc.define(\"__ADX__\", None);\n                    }\n                }\n            }\n        }\n        (true, true) => panic!(\n            \"Cannot compile with both `portable` and `force-adx` features\"\n        ),\n    }\n    if target_env.eq(\"msvc\") && cc.get_compiler().is_like_msvc() {\n        cc.flag(\"-Zl\");\n    }\n    cc.flag_if_supported(\"-mno-avx\") // avoid costly transitions\n        .flag_if_supported(\"-fno-builtin\")\n        .flag_if_supported(\"-Wno-unused-function\")\n        .flag_if_supported(\"-Wno-unused-command-line-argument\");\n    if target_arch.eq(\"wasm32\") || target_family.is_empty() {\n        cc.flag(\"-ffreestanding\");\n    }\n    if target_arch.eq(\"wasm32\") || target_no_std {\n        cc.define(\"SCRATCH_LIMIT\", \"(45 * 1024)\");\n    }\n    if target_env.eq(\"sgx\") {\n        cc.flag_if_supported(\"-mlvi-hardening\");\n        cc.define(\"__SGX_LVI_HARDENING__\", None);\n        cc.define(\"__BLST_NO_CPUID__\", None);\n        cc.define(\"__ELF__\", None);\n        cc.define(\"SCRATCH_LIMIT\", \"(45 * 1024)\");\n    }\n    if !cfg!(debug_assertions) {\n        cc.opt_level(2);\n    }\n    cc.files(&file_vec).compile(\"blst\");\n\n    // pass some DEP_BLST_* variables to dependents\n    println!(\n        \"cargo:BINDINGS={}\",\n        blst_base_dir.join(\"bindings\").to_string_lossy()\n    );\n    println!(\"cargo:C_SRC={}\", c_src_dir.to_string_lossy());\n}\n"
  },
  {
    "path": "bindings/rust/publish.sh",
    "content": "#!/bin/sh\n\nHERE=`dirname $0`\ncd \"${HERE}\"\n\nif [ ! -d blst ]; then\n    trap '[ -h blst ] && rm -f blst' 0 2\n    ln -s ../.. blst\nfi\n\n# --allow-dirty because the temporary blst symbolic link is not committed\ncargo +stable publish --allow-dirty \"$@\"\n"
  },
  {
    "path": "bindings/rust/rustfmt.toml",
    "content": "max_width = 80\n"
  },
  {
    "path": "bindings/rust/src/bindings.rs",
    "content": "/* automatically generated by rust-bindgen 0.65.1 */\n\n#[repr(u32)]\n#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]\npub enum BLST_ERROR {\n    BLST_SUCCESS = 0,\n    BLST_BAD_ENCODING = 1,\n    BLST_POINT_NOT_ON_CURVE = 2,\n    BLST_POINT_NOT_IN_GROUP = 3,\n    BLST_AGGR_TYPE_MISMATCH = 4,\n    BLST_VERIFY_FAIL = 5,\n    BLST_PK_IS_INFINITY = 6,\n    BLST_BAD_SCALAR = 7,\n}\npub type byte = u8;\npub type limb_t = u64;\n#[repr(C)]\n#[derive(Debug, Default, Clone, PartialEq, Eq, Zeroize)]\n#[zeroize(drop)]\npub struct blst_scalar {\n    pub b: [byte; 32usize],\n}\n#[test]\nfn bindgen_test_layout_blst_scalar() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_scalar> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_scalar>(),\n        32usize,\n        concat!(\"Size of: \", stringify!(blst_scalar))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_scalar>(),\n        1usize,\n        concat!(\"Alignment of \", stringify!(blst_scalar))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).b) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_scalar),\n            \"::\",\n            stringify!(b)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]\npub struct blst_fr {\n    pub l: [limb_t; 4usize],\n}\n#[test]\nfn bindgen_test_layout_blst_fr() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_fr> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_fr>(),\n        32usize,\n        concat!(\"Size of: \", stringify!(blst_fr))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_fr>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_fr))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).l) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_fr),\n            \"::\",\n            stringify!(l)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]\npub struct blst_fp {\n    pub l: [limb_t; 6usize],\n}\n#[test]\nfn bindgen_test_layout_blst_fp() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_fp> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_fp>(),\n        48usize,\n        concat!(\"Size of: \", stringify!(blst_fp))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_fp>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_fp))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).l) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_fp),\n            \"::\",\n            stringify!(l)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]\npub struct blst_fp2 {\n    pub fp: [blst_fp; 2usize],\n}\n#[test]\nfn bindgen_test_layout_blst_fp2() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_fp2> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_fp2>(),\n        96usize,\n        concat!(\"Size of: \", stringify!(blst_fp2))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_fp2>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_fp2))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).fp) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_fp2),\n            \"::\",\n            stringify!(fp)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]\npub struct blst_fp6 {\n    pub fp2: [blst_fp2; 3usize],\n}\n#[test]\nfn bindgen_test_layout_blst_fp6() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_fp6> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_fp6>(),\n        288usize,\n        concat!(\"Size of: \", stringify!(blst_fp6))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_fp6>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_fp6))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).fp2) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_fp6),\n            \"::\",\n            stringify!(fp2)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Copy, Clone, Eq)]\npub struct blst_fp12 {\n    pub fp6: [blst_fp6; 2usize],\n}\n#[test]\nfn bindgen_test_layout_blst_fp12() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_fp12> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_fp12>(),\n        576usize,\n        concat!(\"Size of: \", stringify!(blst_fp12))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_fp12>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_fp12))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).fp6) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_fp12),\n            \"::\",\n            stringify!(fp6)\n        )\n    );\n}\nextern \"C\" {\n    pub fn blst_scalar_from_uint32(out: *mut blst_scalar, a: *const u32);\n}\nextern \"C\" {\n    pub fn blst_uint32_from_scalar(out: *mut u32, a: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_scalar_from_uint64(out: *mut blst_scalar, a: *const u64);\n}\nextern \"C\" {\n    pub fn blst_uint64_from_scalar(out: *mut u64, a: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_scalar_from_bendian(out: *mut blst_scalar, a: *const byte);\n}\nextern \"C\" {\n    pub fn blst_bendian_from_scalar(out: *mut byte, a: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_scalar_from_lendian(out: *mut blst_scalar, a: *const byte);\n}\nextern \"C\" {\n    pub fn blst_lendian_from_scalar(out: *mut byte, a: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_scalar_fr_check(a: *const blst_scalar) -> bool;\n}\nextern \"C\" {\n    pub fn blst_sk_check(a: *const blst_scalar) -> bool;\n}\nextern \"C\" {\n    pub fn blst_sk_add_n_check(\n        out: *mut blst_scalar,\n        a: *const blst_scalar,\n        b: *const blst_scalar,\n    ) -> bool;\n}\nextern \"C\" {\n    pub fn blst_sk_sub_n_check(\n        out: *mut blst_scalar,\n        a: *const blst_scalar,\n        b: *const blst_scalar,\n    ) -> bool;\n}\nextern \"C\" {\n    pub fn blst_sk_mul_n_check(\n        out: *mut blst_scalar,\n        a: *const blst_scalar,\n        b: *const blst_scalar,\n    ) -> bool;\n}\nextern \"C\" {\n    pub fn blst_sk_inverse(out: *mut blst_scalar, a: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_scalar_from_le_bytes(out: *mut blst_scalar, in_: *const byte, len: usize) -> bool;\n}\nextern \"C\" {\n    pub fn blst_scalar_from_be_bytes(out: *mut blst_scalar, in_: *const byte, len: usize) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fr_add(ret: *mut blst_fr, a: *const blst_fr, b: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_sub(ret: *mut blst_fr, a: *const blst_fr, b: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_mul_by_3(ret: *mut blst_fr, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_lshift(ret: *mut blst_fr, a: *const blst_fr, count: usize);\n}\nextern \"C\" {\n    pub fn blst_fr_rshift(ret: *mut blst_fr, a: *const blst_fr, count: usize);\n}\nextern \"C\" {\n    pub fn blst_fr_mul(ret: *mut blst_fr, a: *const blst_fr, b: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_sqr(ret: *mut blst_fr, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_cneg(ret: *mut blst_fr, a: *const blst_fr, flag: bool);\n}\nextern \"C\" {\n    pub fn blst_fr_eucl_inverse(ret: *mut blst_fr, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_inverse(ret: *mut blst_fr, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_from_uint64(ret: *mut blst_fr, a: *const u64);\n}\nextern \"C\" {\n    pub fn blst_uint64_from_fr(ret: *mut u64, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_from_scalar(ret: *mut blst_fr, a: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_scalar_from_fr(ret: *mut blst_scalar, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fp_add(ret: *mut blst_fp, a: *const blst_fp, b: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_sub(ret: *mut blst_fp, a: *const blst_fp, b: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_mul_by_3(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_mul_by_8(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_lshift(ret: *mut blst_fp, a: *const blst_fp, count: usize);\n}\nextern \"C\" {\n    pub fn blst_fp_mul(ret: *mut blst_fp, a: *const blst_fp, b: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_sqr(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_cneg(ret: *mut blst_fp, a: *const blst_fp, flag: bool);\n}\nextern \"C\" {\n    pub fn blst_fp_eucl_inverse(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_inverse(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_sqrt(ret: *mut blst_fp, a: *const blst_fp) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fp_from_uint32(ret: *mut blst_fp, a: *const u32);\n}\nextern \"C\" {\n    pub fn blst_uint32_from_fp(ret: *mut u32, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_from_uint64(ret: *mut blst_fp, a: *const u64);\n}\nextern \"C\" {\n    pub fn blst_uint64_from_fp(ret: *mut u64, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_from_bendian(ret: *mut blst_fp, a: *const byte);\n}\nextern \"C\" {\n    pub fn blst_bendian_from_fp(ret: *mut byte, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_from_lendian(ret: *mut blst_fp, a: *const byte);\n}\nextern \"C\" {\n    pub fn blst_lendian_from_fp(ret: *mut byte, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp2_add(ret: *mut blst_fp2, a: *const blst_fp2, b: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_sub(ret: *mut blst_fp2, a: *const blst_fp2, b: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_mul_by_3(ret: *mut blst_fp2, a: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_mul_by_8(ret: *mut blst_fp2, a: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_lshift(ret: *mut blst_fp2, a: *const blst_fp2, count: usize);\n}\nextern \"C\" {\n    pub fn blst_fp2_mul(ret: *mut blst_fp2, a: *const blst_fp2, b: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_sqr(ret: *mut blst_fp2, a: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_cneg(ret: *mut blst_fp2, a: *const blst_fp2, flag: bool);\n}\nextern \"C\" {\n    pub fn blst_fp2_eucl_inverse(ret: *mut blst_fp2, a: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_inverse(ret: *mut blst_fp2, a: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_fp2_sqrt(ret: *mut blst_fp2, a: *const blst_fp2) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fp12_sqr(ret: *mut blst_fp12, a: *const blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_fp12_cyclotomic_sqr(ret: *mut blst_fp12, a: *const blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_fp12_mul(ret: *mut blst_fp12, a: *const blst_fp12, b: *const blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_fp12_mul_by_xy00z0(\n        ret: *mut blst_fp12,\n        a: *const blst_fp12,\n        xy00z0: *const blst_fp6,\n    );\n}\nextern \"C\" {\n    pub fn blst_fp12_conjugate(a: *mut blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_fp12_inverse(ret: *mut blst_fp12, a: *const blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_fp12_frobenius_map(ret: *mut blst_fp12, a: *const blst_fp12, n: usize);\n}\nextern \"C\" {\n    pub fn blst_fp12_is_equal(a: *const blst_fp12, b: *const blst_fp12) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fp12_is_one(a: *const blst_fp12) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fp12_in_group(a: *const blst_fp12) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fp12_one() -> *const blst_fp12;\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, Eq)]\npub struct blst_p1 {\n    pub x: blst_fp,\n    pub y: blst_fp,\n    pub z: blst_fp,\n}\n#[test]\nfn bindgen_test_layout_blst_p1() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_p1> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_p1>(),\n        144usize,\n        concat!(\"Size of: \", stringify!(blst_p1))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_p1>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_p1))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).x) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p1),\n            \"::\",\n            stringify!(x)\n        )\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).y) as usize - ptr as usize },\n        48usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p1),\n            \"::\",\n            stringify!(y)\n        )\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).z) as usize - ptr as usize },\n        96usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p1),\n            \"::\",\n            stringify!(z)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, Eq)]\npub struct blst_p1_affine {\n    pub x: blst_fp,\n    pub y: blst_fp,\n}\n#[test]\nfn bindgen_test_layout_blst_p1_affine() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_p1_affine> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_p1_affine>(),\n        96usize,\n        concat!(\"Size of: \", stringify!(blst_p1_affine))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_p1_affine>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_p1_affine))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).x) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p1_affine),\n            \"::\",\n            stringify!(x)\n        )\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).y) as usize - ptr as usize },\n        48usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p1_affine),\n            \"::\",\n            stringify!(y)\n        )\n    );\n}\nextern \"C\" {\n    pub fn blst_p1_add(out: *mut blst_p1, a: *const blst_p1, b: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p1_add_or_double(out: *mut blst_p1, a: *const blst_p1, b: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p1_add_affine(out: *mut blst_p1, a: *const blst_p1, b: *const blst_p1_affine);\n}\nextern \"C\" {\n    pub fn blst_p1_add_or_double_affine(\n        out: *mut blst_p1,\n        a: *const blst_p1,\n        b: *const blst_p1_affine,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1_double(out: *mut blst_p1, a: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p1_mult(out: *mut blst_p1, p: *const blst_p1, scalar: *const byte, nbits: usize);\n}\nextern \"C\" {\n    pub fn blst_p1_cneg(p: *mut blst_p1, cbit: bool);\n}\nextern \"C\" {\n    pub fn blst_p1_to_affine(out: *mut blst_p1_affine, in_: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p1_from_affine(out: *mut blst_p1, in_: *const blst_p1_affine);\n}\nextern \"C\" {\n    pub fn blst_p1_on_curve(p: *const blst_p1) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_in_g1(p: *const blst_p1) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_is_equal(a: *const blst_p1, b: *const blst_p1) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_is_inf(a: *const blst_p1) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_generator() -> *const blst_p1;\n}\nextern \"C\" {\n    pub fn blst_p1_affine_on_curve(p: *const blst_p1_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_affine_in_g1(p: *const blst_p1_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_affine_is_equal(a: *const blst_p1_affine, b: *const blst_p1_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_affine_is_inf(a: *const blst_p1_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_affine_generator() -> *const blst_p1_affine;\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, Eq)]\npub struct blst_p2 {\n    pub x: blst_fp2,\n    pub y: blst_fp2,\n    pub z: blst_fp2,\n}\n#[test]\nfn bindgen_test_layout_blst_p2() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_p2> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_p2>(),\n        288usize,\n        concat!(\"Size of: \", stringify!(blst_p2))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_p2>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_p2))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).x) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p2),\n            \"::\",\n            stringify!(x)\n        )\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).y) as usize - ptr as usize },\n        96usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p2),\n            \"::\",\n            stringify!(y)\n        )\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).z) as usize - ptr as usize },\n        192usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p2),\n            \"::\",\n            stringify!(z)\n        )\n    );\n}\n#[repr(C)]\n#[derive(Debug, Default, Copy, Clone, Eq)]\npub struct blst_p2_affine {\n    pub x: blst_fp2,\n    pub y: blst_fp2,\n}\n#[test]\nfn bindgen_test_layout_blst_p2_affine() {\n    const UNINIT: ::core::mem::MaybeUninit<blst_p2_affine> = ::core::mem::MaybeUninit::uninit();\n    let ptr = UNINIT.as_ptr();\n    assert_eq!(\n        ::core::mem::size_of::<blst_p2_affine>(),\n        192usize,\n        concat!(\"Size of: \", stringify!(blst_p2_affine))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_p2_affine>(),\n        8usize,\n        concat!(\"Alignment of \", stringify!(blst_p2_affine))\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).x) as usize - ptr as usize },\n        0usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p2_affine),\n            \"::\",\n            stringify!(x)\n        )\n    );\n    assert_eq!(\n        unsafe { ::core::ptr::addr_of!((*ptr).y) as usize - ptr as usize },\n        96usize,\n        concat!(\n            \"Offset of field: \",\n            stringify!(blst_p2_affine),\n            \"::\",\n            stringify!(y)\n        )\n    );\n}\nextern \"C\" {\n    pub fn blst_p2_add(out: *mut blst_p2, a: *const blst_p2, b: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_p2_add_or_double(out: *mut blst_p2, a: *const blst_p2, b: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_p2_add_affine(out: *mut blst_p2, a: *const blst_p2, b: *const blst_p2_affine);\n}\nextern \"C\" {\n    pub fn blst_p2_add_or_double_affine(\n        out: *mut blst_p2,\n        a: *const blst_p2,\n        b: *const blst_p2_affine,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2_double(out: *mut blst_p2, a: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_p2_mult(out: *mut blst_p2, p: *const blst_p2, scalar: *const byte, nbits: usize);\n}\nextern \"C\" {\n    pub fn blst_p2_cneg(p: *mut blst_p2, cbit: bool);\n}\nextern \"C\" {\n    pub fn blst_p2_to_affine(out: *mut blst_p2_affine, in_: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_p2_from_affine(out: *mut blst_p2, in_: *const blst_p2_affine);\n}\nextern \"C\" {\n    pub fn blst_p2_on_curve(p: *const blst_p2) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_in_g2(p: *const blst_p2) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_is_equal(a: *const blst_p2, b: *const blst_p2) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_is_inf(a: *const blst_p2) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_generator() -> *const blst_p2;\n}\nextern \"C\" {\n    pub fn blst_p2_affine_on_curve(p: *const blst_p2_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_affine_in_g2(p: *const blst_p2_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_affine_is_equal(a: *const blst_p2_affine, b: *const blst_p2_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_affine_is_inf(a: *const blst_p2_affine) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p2_affine_generator() -> *const blst_p2_affine;\n}\nextern \"C\" {\n    pub fn blst_p1s_to_affine(\n        dst: *mut blst_p1_affine,\n        points: *const *const blst_p1,\n        npoints: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1s_add(ret: *mut blst_p1, points: *const *const blst_p1_affine, npoints: usize);\n}\nextern \"C\" {\n    pub fn blst_p1s_mult_wbits_precompute_sizeof(wbits: usize, npoints: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_p1s_mult_wbits_precompute(\n        table: *mut blst_p1_affine,\n        wbits: usize,\n        points: *const *const blst_p1_affine,\n        npoints: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1s_mult_wbits_scratch_sizeof(npoints: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_p1s_mult_wbits(\n        ret: *mut blst_p1,\n        table: *const blst_p1_affine,\n        wbits: usize,\n        npoints: usize,\n        scalars: *const *const byte,\n        nbits: usize,\n        scratch: *mut limb_t,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1s_mult_pippenger_scratch_sizeof(npoints: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_p1s_mult_pippenger(\n        ret: *mut blst_p1,\n        points: *const *const blst_p1_affine,\n        npoints: usize,\n        scalars: *const *const byte,\n        nbits: usize,\n        scratch: *mut limb_t,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1s_tile_pippenger(\n        ret: *mut blst_p1,\n        points: *const *const blst_p1_affine,\n        npoints: usize,\n        scalars: *const *const byte,\n        nbits: usize,\n        scratch: *mut limb_t,\n        bit0: usize,\n        window: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2s_to_affine(\n        dst: *mut blst_p2_affine,\n        points: *const *const blst_p2,\n        npoints: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2s_add(ret: *mut blst_p2, points: *const *const blst_p2_affine, npoints: usize);\n}\nextern \"C\" {\n    pub fn blst_p2s_mult_wbits_precompute_sizeof(wbits: usize, npoints: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_p2s_mult_wbits_precompute(\n        table: *mut blst_p2_affine,\n        wbits: usize,\n        points: *const *const blst_p2_affine,\n        npoints: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2s_mult_wbits_scratch_sizeof(npoints: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_p2s_mult_wbits(\n        ret: *mut blst_p2,\n        table: *const blst_p2_affine,\n        wbits: usize,\n        npoints: usize,\n        scalars: *const *const byte,\n        nbits: usize,\n        scratch: *mut limb_t,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2s_mult_pippenger_scratch_sizeof(npoints: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_p2s_mult_pippenger(\n        ret: *mut blst_p2,\n        points: *const *const blst_p2_affine,\n        npoints: usize,\n        scalars: *const *const byte,\n        nbits: usize,\n        scratch: *mut limb_t,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2s_tile_pippenger(\n        ret: *mut blst_p2,\n        points: *const *const blst_p2_affine,\n        npoints: usize,\n        scalars: *const *const byte,\n        nbits: usize,\n        scratch: *mut limb_t,\n        bit0: usize,\n        window: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_map_to_g1(out: *mut blst_p1, u: *const blst_fp, v: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_map_to_g2(out: *mut blst_p2, u: *const blst_fp2, v: *const blst_fp2);\n}\nextern \"C\" {\n    pub fn blst_encode_to_g1(\n        out: *mut blst_p1,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_hash_to_g1(\n        out: *mut blst_p1,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_encode_to_g2(\n        out: *mut blst_p2,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_hash_to_g2(\n        out: *mut blst_p2,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1_serialize(out: *mut byte, in_: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p1_compress(out: *mut byte, in_: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p1_affine_serialize(out: *mut byte, in_: *const blst_p1_affine);\n}\nextern \"C\" {\n    pub fn blst_p1_affine_compress(out: *mut byte, in_: *const blst_p1_affine);\n}\nextern \"C\" {\n    pub fn blst_p1_uncompress(out: *mut blst_p1_affine, in_: *const byte) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_p1_deserialize(out: *mut blst_p1_affine, in_: *const byte) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_p2_serialize(out: *mut byte, in_: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_p2_compress(out: *mut byte, in_: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_p2_affine_serialize(out: *mut byte, in_: *const blst_p2_affine);\n}\nextern \"C\" {\n    pub fn blst_p2_affine_compress(out: *mut byte, in_: *const blst_p2_affine);\n}\nextern \"C\" {\n    pub fn blst_p2_uncompress(out: *mut blst_p2_affine, in_: *const byte) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_p2_deserialize(out: *mut blst_p2_affine, in_: *const byte) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_keygen(\n        out_SK: *mut blst_scalar,\n        IKM: *const byte,\n        IKM_len: usize,\n        info: *const byte,\n        info_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_sk_to_pk_in_g1(out_pk: *mut blst_p1, SK: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_sign_pk_in_g1(out_sig: *mut blst_p2, hash: *const blst_p2, SK: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_sk_to_pk_in_g2(out_pk: *mut blst_p2, SK: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_sign_pk_in_g2(out_sig: *mut blst_p1, hash: *const blst_p1, SK: *const blst_scalar);\n}\nextern \"C\" {\n    pub fn blst_miller_loop(\n        ret: *mut blst_fp12,\n        Q: *const blst_p2_affine,\n        P: *const blst_p1_affine,\n    );\n}\nextern \"C\" {\n    pub fn blst_miller_loop_n(\n        ret: *mut blst_fp12,\n        Qs: *const *const blst_p2_affine,\n        Ps: *const *const blst_p1_affine,\n        n: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_final_exp(ret: *mut blst_fp12, f: *const blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_precompute_lines(Qlines: *mut blst_fp6, Q: *const blst_p2_affine);\n}\nextern \"C\" {\n    pub fn blst_miller_loop_lines(\n        ret: *mut blst_fp12,\n        Qlines: *const blst_fp6,\n        P: *const blst_p1_affine,\n    );\n}\nextern \"C\" {\n    pub fn blst_fp12_finalverify(gt1: *const blst_fp12, gt2: *const blst_fp12) -> bool;\n}\n#[repr(C)]\n#[repr(align(1))]\n#[derive(Debug, Default)]\npub struct blst_pairing {\n    pub _bindgen_opaque_blob: [u8; 0usize],\n}\n#[test]\nfn bindgen_test_layout_blst_pairing() {\n    assert_eq!(\n        ::core::mem::size_of::<blst_pairing>(),\n        0usize,\n        concat!(\"Size of: \", stringify!(blst_pairing))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_pairing>(),\n        1usize,\n        concat!(\"Alignment of \", stringify!(blst_pairing))\n    );\n}\nextern \"C\" {\n    pub fn blst_pairing_sizeof() -> usize;\n}\nextern \"C\" {\n    pub fn blst_pairing_init(\n        new_ctx: *mut blst_pairing,\n        hash_or_encode: bool,\n        DST: *const byte,\n        DST_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_pairing_get_dst(ctx: *const blst_pairing) -> *const byte;\n}\nextern \"C\" {\n    pub fn blst_pairing_commit(ctx: *mut blst_pairing);\n}\nextern \"C\" {\n    pub fn blst_pairing_aggregate_pk_in_g2(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p2_affine,\n        signature: *const blst_p1_affine,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_chk_n_aggr_pk_in_g2(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p2_affine,\n        pk_grpchk: bool,\n        signature: *const blst_p1_affine,\n        sig_grpchk: bool,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_mul_n_aggregate_pk_in_g2(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p2_affine,\n        sig: *const blst_p1_affine,\n        scalar: *const byte,\n        nbits: usize,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_chk_n_mul_n_aggr_pk_in_g2(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p2_affine,\n        pk_grpchk: bool,\n        sig: *const blst_p1_affine,\n        sig_grpchk: bool,\n        scalar: *const byte,\n        nbits: usize,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_aggregate_pk_in_g1(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p1_affine,\n        signature: *const blst_p2_affine,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_chk_n_aggr_pk_in_g1(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p1_affine,\n        pk_grpchk: bool,\n        signature: *const blst_p2_affine,\n        sig_grpchk: bool,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_mul_n_aggregate_pk_in_g1(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p1_affine,\n        sig: *const blst_p2_affine,\n        scalar: *const byte,\n        nbits: usize,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_chk_n_mul_n_aggr_pk_in_g1(\n        ctx: *mut blst_pairing,\n        PK: *const blst_p1_affine,\n        pk_grpchk: bool,\n        sig: *const blst_p2_affine,\n        sig_grpchk: bool,\n        scalar: *const byte,\n        nbits: usize,\n        msg: *const byte,\n        msg_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_merge(ctx: *mut blst_pairing, ctx1: *const blst_pairing) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_pairing_finalverify(ctx: *const blst_pairing, gtsig: *const blst_fp12) -> bool;\n}\nextern \"C\" {\n    pub fn blst_aggregate_in_g1(\n        out: *mut blst_p1,\n        in_: *const blst_p1,\n        zwire: *const byte,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_aggregate_in_g2(\n        out: *mut blst_p2,\n        in_: *const blst_p2,\n        zwire: *const byte,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_aggregated_in_g1(out: *mut blst_fp12, signature: *const blst_p1_affine);\n}\nextern \"C\" {\n    pub fn blst_aggregated_in_g2(out: *mut blst_fp12, signature: *const blst_p2_affine);\n}\nextern \"C\" {\n    pub fn blst_core_verify_pk_in_g1(\n        pk: *const blst_p1_affine,\n        signature: *const blst_p2_affine,\n        hash_or_encode: bool,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub fn blst_core_verify_pk_in_g2(\n        pk: *const blst_p2_affine,\n        signature: *const blst_p1_affine,\n        hash_or_encode: bool,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n        aug: *const byte,\n        aug_len: usize,\n    ) -> BLST_ERROR;\n}\nextern \"C\" {\n    pub static BLS12_381_G1: blst_p1_affine;\n}\nextern \"C\" {\n    pub static BLS12_381_NEG_G1: blst_p1_affine;\n}\nextern \"C\" {\n    pub static BLS12_381_G2: blst_p2_affine;\n}\nextern \"C\" {\n    pub static BLS12_381_NEG_G2: blst_p2_affine;\n}\nextern \"C\" {\n    pub fn blst_fr_ct_bfly(x0: *mut blst_fr, x1: *mut blst_fr, twiddle: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_gs_bfly(x0: *mut blst_fr, x1: *mut blst_fr, twiddle: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_to(ret: *mut blst_fr, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fr_from(ret: *mut blst_fr, a: *const blst_fr);\n}\nextern \"C\" {\n    pub fn blst_fp_to(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_from(ret: *mut blst_fp, a: *const blst_fp);\n}\nextern \"C\" {\n    pub fn blst_fp_is_square(a: *const blst_fp) -> bool;\n}\nextern \"C\" {\n    pub fn blst_fp2_is_square(a: *const blst_fp2) -> bool;\n}\nextern \"C\" {\n    pub fn blst_p1_from_jacobian(out: *mut blst_p1, in_: *const blst_p1);\n}\nextern \"C\" {\n    pub fn blst_p2_from_jacobian(out: *mut blst_p2, in_: *const blst_p2);\n}\nextern \"C\" {\n    pub fn blst_sk_to_pk2_in_g1(\n        out: *mut byte,\n        out_pk: *mut blst_p1_affine,\n        SK: *const blst_scalar,\n    );\n}\nextern \"C\" {\n    pub fn blst_sign_pk2_in_g1(\n        out: *mut byte,\n        out_sig: *mut blst_p2_affine,\n        hash: *const blst_p2,\n        SK: *const blst_scalar,\n    );\n}\nextern \"C\" {\n    pub fn blst_sk_to_pk2_in_g2(\n        out: *mut byte,\n        out_pk: *mut blst_p2_affine,\n        SK: *const blst_scalar,\n    );\n}\nextern \"C\" {\n    pub fn blst_sign_pk2_in_g2(\n        out: *mut byte,\n        out_sig: *mut blst_p1_affine,\n        hash: *const blst_p1,\n        SK: *const blst_scalar,\n    );\n}\n#[repr(C)]\n#[repr(align(1))]\n#[derive(Debug, Default)]\npub struct blst_uniq {\n    pub _bindgen_opaque_blob: [u8; 0usize],\n}\n#[test]\nfn bindgen_test_layout_blst_uniq() {\n    assert_eq!(\n        ::core::mem::size_of::<blst_uniq>(),\n        0usize,\n        concat!(\"Size of: \", stringify!(blst_uniq))\n    );\n    assert_eq!(\n        ::core::mem::align_of::<blst_uniq>(),\n        1usize,\n        concat!(\"Alignment of \", stringify!(blst_uniq))\n    );\n}\nextern \"C\" {\n    pub fn blst_uniq_sizeof(n_nodes: usize) -> usize;\n}\nextern \"C\" {\n    pub fn blst_uniq_init(tree: *mut blst_uniq);\n}\nextern \"C\" {\n    pub fn blst_uniq_test(tree: *mut blst_uniq, msg: *const byte, len: usize) -> bool;\n}\nextern \"C\" {\n    pub fn blst_expand_message_xmd(\n        out: *mut byte,\n        out_len: usize,\n        msg: *const byte,\n        msg_len: usize,\n        DST: *const byte,\n        DST_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p1_unchecked_mult(\n        out: *mut blst_p1,\n        p: *const blst_p1,\n        scalar: *const byte,\n        nbits: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_p2_unchecked_mult(\n        out: *mut blst_p2,\n        p: *const blst_p2,\n        scalar: *const byte,\n        nbits: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_pairing_raw_aggregate(\n        ctx: *mut blst_pairing,\n        q: *const blst_p2_affine,\n        p: *const blst_p1_affine,\n    );\n}\nextern \"C\" {\n    pub fn blst_pairing_as_fp12(ctx: *mut blst_pairing) -> *mut blst_fp12;\n}\nextern \"C\" {\n    pub fn blst_bendian_from_fp12(out: *mut byte, a: *const blst_fp12);\n}\nextern \"C\" {\n    pub fn blst_keygen_v3(\n        out_SK: *mut blst_scalar,\n        IKM: *const byte,\n        IKM_len: usize,\n        info: *const byte,\n        info_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_keygen_v4_5(\n        out_SK: *mut blst_scalar,\n        IKM: *const byte,\n        IKM_len: usize,\n        salt: *const byte,\n        salt_len: usize,\n        info: *const byte,\n        info_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_keygen_v5(\n        out_SK: *mut blst_scalar,\n        IKM: *const byte,\n        IKM_len: usize,\n        salt: *const byte,\n        salt_len: usize,\n        info: *const byte,\n        info_len: usize,\n    );\n}\nextern \"C\" {\n    pub fn blst_derive_master_eip2333(out_SK: *mut blst_scalar, IKM: *const byte, IKM_len: usize);\n}\nextern \"C\" {\n    pub fn blst_derive_child_eip2333(\n        out_SK: *mut blst_scalar,\n        SK: *const blst_scalar,\n        child_index: u32,\n    );\n}\nextern \"C\" {\n    pub fn blst_scalar_from_hexascii(out: *mut blst_scalar, hex: *const byte);\n}\nextern \"C\" {\n    pub fn blst_fr_from_hexascii(ret: *mut blst_fr, hex: *const byte);\n}\nextern \"C\" {\n    pub fn blst_fp_from_hexascii(ret: *mut blst_fp, hex: *const byte);\n}\nextern \"C\" {\n    pub fn blst_p1_sizeof() -> usize;\n}\nextern \"C\" {\n    pub fn blst_p1_affine_sizeof() -> usize;\n}\nextern \"C\" {\n    pub fn blst_p2_sizeof() -> usize;\n}\nextern \"C\" {\n    pub fn blst_p2_affine_sizeof() -> usize;\n}\nextern \"C\" {\n    pub fn blst_fp12_sizeof() -> usize;\n}\nextern \"C\" {\n    pub fn blst_fp_from_le_bytes(ret: *mut blst_fp, in_: *const byte, len: usize);\n}\nextern \"C\" {\n    pub fn blst_fp_from_be_bytes(ret: *mut blst_fp, in_: *const byte, len: usize);\n}\nextern \"C\" {\n    pub fn blst_sha256(out: *mut byte, msg: *const byte, msg_len: usize);\n}\n#[test]\nfn bindgen_test_normal_types() {\n    // from \"Rust for Rustaceans\" by Jon Gjengset\n    fn is_normal<T: Sized + Send + Sync + Unpin>() {}\n    is_normal::<BLST_ERROR>();\n    is_normal::<blst_scalar>();\n    is_normal::<blst_fr>();\n    is_normal::<blst_fp>();\n    is_normal::<blst_fp2>();\n    is_normal::<blst_fp6>();\n    is_normal::<blst_fp12>();\n    is_normal::<blst_p1>();\n    is_normal::<blst_p1_affine>();\n    is_normal::<blst_p2>();\n    is_normal::<blst_p2_affine>();\n    is_normal::<blst_pairing>();\n    is_normal::<blst_uniq>();\n}\n"
  },
  {
    "path": "bindings/rust/src/lib.rs",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\n#![cfg_attr(not(feature = \"std\"), no_std)]\n#![allow(non_upper_case_globals)]\n#![allow(non_camel_case_types)]\n#![allow(non_snake_case)]\n#![allow(unexpected_cfgs)]\n\nextern crate alloc;\n\nuse alloc::boxed::Box;\nuse alloc::vec;\nuse alloc::vec::Vec;\nuse core::any::Any;\nuse core::mem::{transmute, MaybeUninit};\nuse core::ptr;\nuse zeroize::Zeroize;\n\n#[cfg(feature = \"std\")]\nuse std::sync::{atomic::*, mpsc::sync_channel, Arc};\n\n#[cfg(feature = \"serde\")]\nuse serde::{Deserialize, Deserializer, Serialize, Serializer};\n\n#[cfg(feature = \"std\")]\ntrait ThreadPoolExt {\n    fn joined_execute<'any, F>(&self, job: F)\n    where\n        F: FnOnce() + Send + 'any;\n}\n\n#[cfg(all(not(feature = \"no-threads\"), feature = \"std\"))]\nmod mt {\n    use super::*;\n    use std::sync::{Mutex, Once};\n    use threadpool::ThreadPool;\n\n    pub fn da_pool() -> ThreadPool {\n        static INIT: Once = Once::new();\n        static mut POOL: *const Mutex<ThreadPool> = ptr::null();\n\n        INIT.call_once(|| {\n            let pool = Mutex::new(ThreadPool::default());\n            unsafe { POOL = transmute::<Box<_>, *const _>(Box::new(pool)) };\n        });\n        unsafe { (*POOL).lock().unwrap().clone() }\n    }\n\n    type Thunk<'any> = Box<dyn FnOnce() + Send + 'any>;\n\n    impl ThreadPoolExt for ThreadPool {\n        fn joined_execute<'scope, F>(&self, job: F)\n        where\n            F: FnOnce() + Send + 'scope,\n        {\n            // Bypass 'lifetime limitations by brute force. It works,\n            // because we explicitly join the threads...\n            self.execute(unsafe {\n                transmute::<Thunk<'scope>, Thunk<'static>>(Box::new(job))\n            })\n        }\n    }\n}\n\n#[cfg(all(feature = \"no-threads\", feature = \"std\"))]\nmod mt {\n    use super::*;\n\n    pub struct EmptyPool {}\n\n    pub fn da_pool() -> EmptyPool {\n        EmptyPool {}\n    }\n\n    impl EmptyPool {\n        pub fn max_count(&self) -> usize {\n            1\n        }\n    }\n\n    impl ThreadPoolExt for EmptyPool {\n        fn joined_execute<'scope, F>(&self, job: F)\n        where\n            F: FnOnce() + Send + 'scope,\n        {\n            job()\n        }\n    }\n}\n\ninclude!(\"bindings.rs\");\n\nimpl PartialEq for blst_p1 {\n    fn eq(&self, other: &Self) -> bool {\n        unsafe { blst_p1_is_equal(self, other) }\n    }\n}\n\nimpl PartialEq for blst_p1_affine {\n    fn eq(&self, other: &Self) -> bool {\n        unsafe { blst_p1_affine_is_equal(self, other) }\n    }\n}\n\nimpl PartialEq for blst_p2 {\n    fn eq(&self, other: &Self) -> bool {\n        unsafe { blst_p2_is_equal(self, other) }\n    }\n}\n\nimpl PartialEq for blst_p2_affine {\n    fn eq(&self, other: &Self) -> bool {\n        unsafe { blst_p2_affine_is_equal(self, other) }\n    }\n}\n\nimpl Default for blst_fp12 {\n    fn default() -> Self {\n        unsafe { *blst_fp12_one() }\n    }\n}\n\nimpl PartialEq for blst_fp12 {\n    fn eq(&self, other: &Self) -> bool {\n        unsafe { blst_fp12_is_equal(self, other) }\n    }\n}\n\nimpl core::ops::Mul for blst_fp12 {\n    type Output = Self;\n\n    fn mul(self, other: Self) -> Self {\n        let mut out = MaybeUninit::<blst_fp12>::uninit();\n        unsafe {\n            blst_fp12_mul(out.as_mut_ptr(), &self, &other);\n            out.assume_init()\n        }\n    }\n}\n\nimpl core::ops::MulAssign for blst_fp12 {\n    fn mul_assign(&mut self, other: Self) {\n        unsafe { blst_fp12_mul(self, self, &other) }\n    }\n}\n\nimpl blst_fp12 {\n    pub fn miller_loop(q: &blst_p2_affine, p: &blst_p1_affine) -> Self {\n        let mut out = MaybeUninit::<blst_fp12>::uninit();\n        unsafe {\n            blst_miller_loop(out.as_mut_ptr(), q, p);\n            out.assume_init()\n        }\n    }\n\n    #[cfg(not(feature = \"std\"))]\n    pub fn miller_loop_n(q: &[blst_p2_affine], p: &[blst_p1_affine]) -> Self {\n        let n_elems = q.len();\n        if n_elems != p.len() || n_elems == 0 {\n            panic!(\"inputs' lengths mismatch\");\n        }\n        let qs: [*const _; 2] = [&q[0], ptr::null()];\n        let ps: [*const _; 2] = [&p[0], ptr::null()];\n        let mut out = MaybeUninit::<blst_fp12>::uninit();\n        unsafe {\n            blst_miller_loop_n(out.as_mut_ptr(), &qs[0], &ps[0], n_elems);\n            out.assume_init()\n        }\n    }\n\n    #[cfg(feature = \"std\")]\n    pub fn miller_loop_n(q: &[blst_p2_affine], p: &[blst_p1_affine]) -> Self {\n        let n_elems = q.len();\n        if n_elems != p.len() || n_elems == 0 {\n            panic!(\"inputs' lengths mismatch\");\n        }\n\n        let pool = mt::da_pool();\n\n        let mut n_workers = pool.max_count();\n        if n_workers == 1 {\n            let qs: [*const _; 2] = [&q[0], ptr::null()];\n            let ps: [*const _; 2] = [&p[0], ptr::null()];\n            let mut out = MaybeUninit::<blst_fp12>::uninit();\n            unsafe {\n                blst_miller_loop_n(out.as_mut_ptr(), &qs[0], &ps[0], n_elems);\n                return out.assume_init();\n            }\n        }\n\n        let counter = Arc::new(AtomicUsize::new(0));\n        let stride = core::cmp::min((n_elems + n_workers - 1) / n_workers, 16);\n        n_workers = core::cmp::min((n_elems + stride - 1) / stride, n_workers);\n        let (tx, rx) = sync_channel(n_workers);\n        for _ in 0..n_workers {\n            let tx = tx.clone();\n            let counter = counter.clone();\n\n            pool.joined_execute(move || {\n                let mut acc = blst_fp12::default();\n                let mut tmp = MaybeUninit::<blst_fp12>::uninit();\n                let mut qs: [*const _; 2] = [ptr::null(), ptr::null()];\n                let mut ps: [*const _; 2] = [ptr::null(), ptr::null()];\n\n                loop {\n                    let work = counter.fetch_add(stride, Ordering::Relaxed);\n                    if work >= n_elems {\n                        break;\n                    }\n                    let n = core::cmp::min(n_elems - work, stride);\n                    qs[0] = &q[work];\n                    ps[0] = &p[work];\n                    unsafe {\n                        blst_miller_loop_n(tmp.as_mut_ptr(), &qs[0], &ps[0], n);\n                        acc *= tmp.assume_init();\n                    }\n                }\n\n                tx.send(acc).expect(\"disaster\");\n            });\n        }\n\n        let mut acc = rx.recv().unwrap();\n        for _ in 1..n_workers {\n            acc *= rx.recv().unwrap();\n        }\n\n        acc\n    }\n\n    pub fn final_exp(&self) -> Self {\n        let mut out = MaybeUninit::<blst_fp12>::uninit();\n        unsafe {\n            blst_final_exp(out.as_mut_ptr(), self);\n            out.assume_init()\n        }\n    }\n\n    pub fn in_group(&self) -> bool {\n        unsafe { blst_fp12_in_group(self) }\n    }\n\n    pub fn finalverify(a: &Self, b: &Self) -> bool {\n        unsafe { blst_fp12_finalverify(a, b) }\n    }\n\n    pub fn to_bendian(&self) -> [u8; 48 * 12] {\n        let mut out = MaybeUninit::<[u8; 48 * 12]>::uninit();\n        unsafe {\n            blst_bendian_from_fp12(out.as_mut_ptr() as *mut u8, self);\n            out.assume_init()\n        }\n    }\n}\n\nimpl blst_scalar {\n    pub fn hash_to(msg: &[u8], dst: &[u8]) -> Option<Self> {\n        unsafe {\n            let mut out = <Self>::default();\n            let mut elem = [0u8; 48];\n            blst_expand_message_xmd(\n                elem.as_mut_ptr(),\n                elem.len(),\n                msg.as_ptr(),\n                msg.len(),\n                dst.as_ptr(),\n                dst.len(),\n            );\n            if blst_scalar_from_be_bytes(&mut out, elem.as_ptr(), elem.len()) {\n                Some(out)\n            } else {\n                None\n            }\n        }\n    }\n}\n\n#[derive(Debug)]\npub struct Pairing {\n    v: Box<[u64]>,\n}\n\nimpl Pairing {\n    pub fn new(hash_or_encode: bool, dst: &[u8]) -> Self {\n        let v: Vec<u64> = vec![0; unsafe { blst_pairing_sizeof() } / 8];\n        let mut obj = Self {\n            v: v.into_boxed_slice(),\n        };\n        obj.init(hash_or_encode, dst);\n        obj\n    }\n\n    pub fn init(&mut self, hash_or_encode: bool, dst: &[u8]) {\n        unsafe {\n            blst_pairing_init(\n                self.ctx(),\n                hash_or_encode,\n                dst.as_ptr(),\n                dst.len(),\n            )\n        }\n    }\n    fn ctx(&mut self) -> *mut blst_pairing {\n        self.v.as_mut_ptr() as *mut blst_pairing\n    }\n    fn const_ctx(&self) -> *const blst_pairing {\n        self.v.as_ptr() as *const blst_pairing\n    }\n\n    pub fn aggregate(\n        &mut self,\n        pk: &dyn Any,\n        pk_validate: bool,\n        sig: &dyn Any,\n        sig_groupcheck: bool,\n        msg: &[u8],\n        aug: &[u8],\n    ) -> BLST_ERROR {\n        if pk.is::<blst_p1_affine>() {\n            unsafe {\n                blst_pairing_chk_n_aggr_pk_in_g1(\n                    self.ctx(),\n                    match pk.downcast_ref::<blst_p1_affine>() {\n                        Some(pk) => pk,\n                        None => ptr::null(),\n                    },\n                    pk_validate,\n                    match sig.downcast_ref::<blst_p2_affine>() {\n                        Some(sig) => sig,\n                        None => ptr::null(),\n                    },\n                    sig_groupcheck,\n                    msg.as_ptr(),\n                    msg.len(),\n                    aug.as_ptr(),\n                    aug.len(),\n                )\n            }\n        } else if pk.is::<blst_p2_affine>() {\n            unsafe {\n                blst_pairing_chk_n_aggr_pk_in_g2(\n                    self.ctx(),\n                    match pk.downcast_ref::<blst_p2_affine>() {\n                        Some(pk) => pk,\n                        None => ptr::null(),\n                    },\n                    pk_validate,\n                    match sig.downcast_ref::<blst_p1_affine>() {\n                        Some(sig) => sig,\n                        None => ptr::null(),\n                    },\n                    sig_groupcheck,\n                    msg.as_ptr(),\n                    msg.len(),\n                    aug.as_ptr(),\n                    aug.len(),\n                )\n            }\n        } else {\n            panic!(\"whaaaa?\")\n        }\n    }\n\n    #[allow(clippy::too_many_arguments)]\n    pub fn mul_n_aggregate(\n        &mut self,\n        pk: &dyn Any,\n        pk_validate: bool,\n        sig: &dyn Any,\n        sig_groupcheck: bool,\n        scalar: &[u8],\n        nbits: usize,\n        msg: &[u8],\n        aug: &[u8],\n    ) -> BLST_ERROR {\n        if pk.is::<blst_p1_affine>() {\n            unsafe {\n                blst_pairing_chk_n_mul_n_aggr_pk_in_g1(\n                    self.ctx(),\n                    match pk.downcast_ref::<blst_p1_affine>() {\n                        Some(pk) => pk,\n                        None => ptr::null(),\n                    },\n                    pk_validate,\n                    match sig.downcast_ref::<blst_p2_affine>() {\n                        Some(sig) => sig,\n                        None => ptr::null(),\n                    },\n                    sig_groupcheck,\n                    scalar.as_ptr(),\n                    nbits,\n                    msg.as_ptr(),\n                    msg.len(),\n                    aug.as_ptr(),\n                    aug.len(),\n                )\n            }\n        } else if pk.is::<blst_p2_affine>() {\n            unsafe {\n                blst_pairing_chk_n_mul_n_aggr_pk_in_g2(\n                    self.ctx(),\n                    match pk.downcast_ref::<blst_p2_affine>() {\n                        Some(pk) => pk,\n                        None => ptr::null(),\n                    },\n                    pk_validate,\n                    match sig.downcast_ref::<blst_p1_affine>() {\n                        Some(sig) => sig,\n                        None => ptr::null(),\n                    },\n                    sig_groupcheck,\n                    scalar.as_ptr(),\n                    nbits,\n                    msg.as_ptr(),\n                    msg.len(),\n                    aug.as_ptr(),\n                    aug.len(),\n                )\n            }\n        } else {\n            panic!(\"whaaaa?\")\n        }\n    }\n\n    pub fn aggregated(gtsig: &mut blst_fp12, sig: &dyn Any) {\n        if sig.is::<blst_p1_affine>() {\n            unsafe {\n                blst_aggregated_in_g1(\n                    gtsig,\n                    sig.downcast_ref::<blst_p1_affine>().unwrap(),\n                )\n            }\n        } else if sig.is::<blst_p2_affine>() {\n            unsafe {\n                blst_aggregated_in_g2(\n                    gtsig,\n                    sig.downcast_ref::<blst_p2_affine>().unwrap(),\n                )\n            }\n        } else {\n            panic!(\"whaaaa?\")\n        }\n    }\n\n    pub fn commit(&mut self) {\n        unsafe { blst_pairing_commit(self.ctx()) }\n    }\n\n    pub fn merge(&mut self, ctx1: &Self) -> BLST_ERROR {\n        unsafe { blst_pairing_merge(self.ctx(), ctx1.const_ctx()) }\n    }\n\n    pub fn finalverify(&self, gtsig: Option<&blst_fp12>) -> bool {\n        unsafe {\n            blst_pairing_finalverify(\n                self.const_ctx(),\n                match gtsig {\n                    Some(gtsig) => gtsig,\n                    None => ptr::null(),\n                },\n            )\n        }\n    }\n\n    pub fn raw_aggregate(&mut self, q: &blst_p2_affine, p: &blst_p1_affine) {\n        unsafe { blst_pairing_raw_aggregate(self.ctx(), q, p) }\n    }\n\n    pub fn as_fp12(&mut self) -> blst_fp12 {\n        unsafe { *blst_pairing_as_fp12(self.ctx()) }\n    }\n}\n\npub fn uniq(msgs: &[&[u8]]) -> bool {\n    let n_elems = msgs.len();\n\n    if n_elems == 1 {\n        return true;\n    } else if n_elems == 2 {\n        return msgs[0] != msgs[1];\n    }\n\n    let mut v: Vec<u64> = vec![0; unsafe { blst_uniq_sizeof(n_elems) } / 8];\n    let ctx = v.as_mut_ptr() as *mut blst_uniq;\n\n    unsafe { blst_uniq_init(ctx) };\n\n    for msg in msgs.iter() {\n        if !unsafe { blst_uniq_test(ctx, msg.as_ptr(), msg.len()) } {\n            return false;\n        }\n    }\n\n    true\n}\n\n#[cfg(feature = \"std\")]\npub fn print_bytes(bytes: &[u8], name: &str) {\n    print!(\"{} \", name);\n    for b in bytes.iter() {\n        print!(\"{:02x}\", b);\n    }\n    println!();\n}\n\nmacro_rules! sig_variant_impl {\n    (\n        $name:expr,\n        $pk:ty,\n        $pk_aff:ty,\n        $sig:ty,\n        $sig_aff:ty,\n        $sk_to_pk:ident,\n        $hash_or_encode:expr,\n        $hash_or_encode_to:ident,\n        $sign:ident,\n        $pk_eq:ident,\n        $sig_eq:ident,\n        $verify:ident,\n        $pk_in_group:ident,\n        $pk_to_aff:ident,\n        $pk_from_aff:ident,\n        $pk_ser:ident,\n        $pk_comp:ident,\n        $pk_deser:ident,\n        $pk_uncomp:ident,\n        $pk_comp_size:expr,\n        $pk_ser_size:expr,\n        $sig_in_group:ident,\n        $sig_to_aff:ident,\n        $sig_from_aff:ident,\n        $sig_ser:ident,\n        $sig_comp:ident,\n        $sig_deser:ident,\n        $sig_uncomp:ident,\n        $sig_comp_size:expr,\n        $sig_ser_size:expr,\n        $pk_add_or_dbl:ident,\n        $pk_add_or_dbl_aff:ident,\n        $pk_cneg:ident,\n        $sig_add_or_dbl:ident,\n        $sig_add_or_dbl_aff:ident,\n        $pk_is_inf:ident,\n        $sig_is_inf:ident,\n        $sig_aggr_in_group:ident,\n    ) => {\n        /// Secret Key\n        #[repr(transparent)]\n        #[derive(Default, Debug, Clone, Zeroize)]\n        #[zeroize(drop)]\n        pub struct SecretKey {\n            value: blst_scalar,\n        }\n\n        impl SecretKey {\n            /// Deterministically generate a secret key from key material\n            pub fn key_gen(\n                ikm: &[u8],\n                key_info: &[u8],\n            ) -> Result<Self, BLST_ERROR> {\n                if ikm.len() < 32 {\n                    return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                }\n                let mut sk = SecretKey::default();\n                unsafe {\n                    blst_keygen(\n                        &mut sk.value,\n                        ikm.as_ptr(),\n                        ikm.len(),\n                        key_info.as_ptr(),\n                        key_info.len(),\n                    );\n                }\n                Ok(sk)\n            }\n\n            pub fn key_gen_v3(\n                ikm: &[u8],\n                key_info: &[u8],\n            ) -> Result<Self, BLST_ERROR> {\n                if ikm.len() < 32 {\n                    return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                }\n                let mut sk = SecretKey::default();\n                unsafe {\n                    blst_keygen_v3(\n                        &mut sk.value,\n                        ikm.as_ptr(),\n                        ikm.len(),\n                        key_info.as_ptr(),\n                        key_info.len(),\n                    );\n                }\n                Ok(sk)\n            }\n\n            pub fn key_gen_v4_5(\n                ikm: &[u8],\n                salt: &[u8],\n                info: &[u8],\n            ) -> Result<Self, BLST_ERROR> {\n                if ikm.len() < 32 {\n                    return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                }\n                let mut sk = SecretKey::default();\n                unsafe {\n                    blst_keygen_v4_5(\n                        &mut sk.value,\n                        ikm.as_ptr(),\n                        ikm.len(),\n                        salt.as_ptr(),\n                        salt.len(),\n                        info.as_ptr(),\n                        info.len(),\n                    );\n                }\n                Ok(sk)\n            }\n\n            pub fn key_gen_v5(\n                ikm: &[u8],\n                salt: &[u8],\n                info: &[u8],\n            ) -> Result<Self, BLST_ERROR> {\n                if ikm.len() < 32 {\n                    return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                }\n                let mut sk = SecretKey::default();\n                unsafe {\n                    blst_keygen_v5(\n                        &mut sk.value,\n                        ikm.as_ptr(),\n                        ikm.len(),\n                        salt.as_ptr(),\n                        salt.len(),\n                        info.as_ptr(),\n                        info.len(),\n                    );\n                }\n                Ok(sk)\n            }\n\n            pub fn derive_master_eip2333(\n                ikm: &[u8],\n            ) -> Result<Self, BLST_ERROR> {\n                if ikm.len() < 32 {\n                    return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                }\n                let mut sk = SecretKey::default();\n                unsafe {\n                    blst_derive_master_eip2333(\n                        &mut sk.value,\n                        ikm.as_ptr(),\n                        ikm.len(),\n                    );\n                }\n                Ok(sk)\n            }\n\n            pub fn derive_child_eip2333(&self, child_index: u32) -> Self {\n                let mut sk = SecretKey::default();\n                unsafe {\n                    blst_derive_child_eip2333(\n                        &mut sk.value,\n                        &self.value,\n                        child_index,\n                    );\n                }\n                sk\n            }\n\n            // sk_to_pk\n            pub fn sk_to_pk(&self) -> PublicKey {\n                // TODO - would the user like the serialized/compressed pk as well?\n                let mut pk_aff = PublicKey::default();\n                //let mut pk_ser = [0u8; $pk_ser_size];\n\n                unsafe {\n                    $sk_to_pk(\n                        //pk_ser.as_mut_ptr(),\n                        ptr::null_mut(),\n                        &mut pk_aff.point,\n                        &self.value,\n                    );\n                }\n                pk_aff\n            }\n\n            // Sign\n            pub fn sign(\n                &self,\n                msg: &[u8],\n                dst: &[u8],\n                aug: &[u8],\n            ) -> Signature {\n                // TODO - would the user like the serialized/compressed sig as well?\n                let mut q = <$sig>::default();\n                let mut sig_aff = <$sig_aff>::default();\n                //let mut sig_ser = [0u8; $sig_ser_size];\n                unsafe {\n                    $hash_or_encode_to(\n                        &mut q,\n                        msg.as_ptr(),\n                        msg.len(),\n                        dst.as_ptr(),\n                        dst.len(),\n                        aug.as_ptr(),\n                        aug.len(),\n                    );\n                    $sign(ptr::null_mut(), &mut sig_aff, &q, &self.value);\n                }\n                Signature { point: sig_aff }\n            }\n\n            // TODO - formally speaking application is entitled to have\n            // ultimate control over secret key storage, which means that\n            // corresponding serialization/deserialization subroutines\n            // should accept reference to where to store the result, as\n            // opposite to returning one.\n\n            // serialize\n            pub fn serialize(&self) -> [u8; 32] {\n                let mut sk_out = [0; 32];\n                unsafe {\n                    blst_bendian_from_scalar(sk_out.as_mut_ptr(), &self.value);\n                }\n                sk_out\n            }\n\n            // deserialize\n            pub fn deserialize(sk_in: &[u8]) -> Result<Self, BLST_ERROR> {\n                let mut sk = blst_scalar::default();\n                if sk_in.len() != 32 {\n                    return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                }\n                unsafe {\n                    blst_scalar_from_bendian(&mut sk, sk_in.as_ptr());\n                    if !blst_sk_check(&sk) {\n                        return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                    }\n                }\n                Ok(Self { value: sk })\n            }\n\n            pub fn to_bytes(&self) -> [u8; 32] {\n                SecretKey::serialize(&self)\n            }\n\n            pub fn from_bytes(sk_in: &[u8]) -> Result<Self, BLST_ERROR> {\n                SecretKey::deserialize(sk_in)\n            }\n        }\n\n        #[cfg(feature = \"serde-secret\")]\n        impl Serialize for SecretKey {\n            fn serialize<S: Serializer>(\n                &self,\n                ser: S,\n            ) -> Result<S::Ok, S::Error> {\n                let bytes = zeroize::Zeroizing::new(self.serialize());\n                ser.serialize_bytes(bytes.as_ref())\n            }\n        }\n\n        #[cfg(feature = \"serde-secret\")]\n        impl<'de> Deserialize<'de> for SecretKey {\n            fn deserialize<D: Deserializer<'de>>(\n                deser: D,\n            ) -> Result<Self, D::Error> {\n                let bytes: &[u8] = Deserialize::deserialize(deser)?;\n                Self::deserialize(bytes).map_err(|e| {\n                    <D::Error as serde::de::Error>::custom(format!(\"{:?}\", e))\n                })\n            }\n        }\n\n        // From<by-value> traits are not provided to discourage duplication\n        // of the secret key material.\n        impl<'a> From<&'a SecretKey> for &'a blst_scalar {\n            fn from(sk: &'a SecretKey) -> Self {\n                unsafe {\n                    transmute::<&SecretKey, Self>(sk)\n                }\n            }\n        }\n\n        impl<'a> core::convert::TryFrom<&'a blst_scalar> for &'a SecretKey {\n            type Error = BLST_ERROR;\n\n            fn try_from(sk: &'a blst_scalar) -> Result<Self, Self::Error> {\n                unsafe {\n                    if !blst_sk_check(sk) {\n                        return Err(BLST_ERROR::BLST_BAD_ENCODING);\n                    }\n                    Ok(transmute::<&blst_scalar, Self>(sk))\n                }\n            }\n        }\n\n        #[repr(transparent)]\n        #[derive(Default, Debug, Clone, Copy)]\n        pub struct PublicKey {\n            point: $pk_aff,\n        }\n\n        impl PublicKey {\n            // Core operations\n\n            // key_validate\n            pub fn validate(&self) -> Result<(), BLST_ERROR> {\n                unsafe {\n                    if $pk_is_inf(&self.point) {\n                        return Err(BLST_ERROR::BLST_PK_IS_INFINITY);\n                    }\n                    if !$pk_in_group(&self.point) {\n                        return Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP);\n                    }\n                }\n                Ok(())\n            }\n\n            pub fn key_validate(key: &[u8]) -> Result<Self, BLST_ERROR> {\n                let pk = PublicKey::from_bytes(key)?;\n                pk.validate()?;\n                Ok(pk)\n            }\n\n            pub fn from_aggregate(agg_pk: &AggregatePublicKey) -> Self {\n                let mut pk_aff = <$pk_aff>::default();\n                unsafe {\n                    $pk_to_aff(&mut pk_aff, &agg_pk.point);\n                }\n                Self { point: pk_aff }\n            }\n\n            // Serdes\n\n            pub fn compress(&self) -> [u8; $pk_comp_size] {\n                let mut pk_comp = [0u8; $pk_comp_size];\n                unsafe {\n                    $pk_comp(pk_comp.as_mut_ptr(), &self.point);\n                }\n                pk_comp\n            }\n\n            pub fn serialize(&self) -> [u8; $pk_ser_size] {\n                let mut pk_out = [0u8; $pk_ser_size];\n                unsafe {\n                    $pk_ser(pk_out.as_mut_ptr(), &self.point);\n                }\n                pk_out\n            }\n\n            pub fn uncompress(pk_comp: &[u8]) -> Result<Self, BLST_ERROR> {\n                if pk_comp.len() == $pk_comp_size && (pk_comp[0] & 0x80) != 0 {\n                    let mut pk = <$pk_aff>::default();\n                    let err = unsafe { $pk_uncomp(&mut pk, pk_comp.as_ptr()) };\n                    if err != BLST_ERROR::BLST_SUCCESS {\n                        return Err(err);\n                    }\n                    Ok(Self { point: pk })\n                } else {\n                    Err(BLST_ERROR::BLST_BAD_ENCODING)\n                }\n            }\n\n            pub fn deserialize(pk_in: &[u8]) -> Result<Self, BLST_ERROR> {\n                if (pk_in.len() == $pk_ser_size && (pk_in[0] & 0x80) == 0)\n                    || (pk_in.len() == $pk_comp_size && (pk_in[0] & 0x80) != 0)\n                {\n                    let mut pk = <$pk_aff>::default();\n                    let err = unsafe { $pk_deser(&mut pk, pk_in.as_ptr()) };\n                    if err != BLST_ERROR::BLST_SUCCESS {\n                        return Err(err);\n                    }\n                    Ok(Self { point: pk })\n                } else {\n                    Err(BLST_ERROR::BLST_BAD_ENCODING)\n                }\n            }\n\n            pub fn from_bytes(pk_in: &[u8]) -> Result<Self, BLST_ERROR> {\n                PublicKey::deserialize(pk_in)\n            }\n\n            pub fn to_bytes(&self) -> [u8; $pk_comp_size] {\n                self.compress()\n            }\n        }\n\n        // Trait for equality comparisons which are equivalence relations.\n        //\n        // This means, that in addition to a == b and a != b being strict\n        // inverses, the equality must be reflexive, symmetric and transitive.\n        impl Eq for PublicKey {}\n\n        impl PartialEq for PublicKey {\n            fn eq(&self, other: &Self) -> bool {\n                unsafe { $pk_eq(&self.point, &other.point) }\n            }\n        }\n\n        #[cfg(feature = \"serde\")]\n        impl Serialize for PublicKey {\n            fn serialize<S: Serializer>(\n                &self,\n                ser: S,\n            ) -> Result<S::Ok, S::Error> {\n                ser.serialize_bytes(&self.serialize())\n            }\n        }\n\n        #[cfg(feature = \"serde\")]\n        impl<'de> Deserialize<'de> for PublicKey {\n            fn deserialize<D: Deserializer<'de>>(\n                deser: D,\n            ) -> Result<Self, D::Error> {\n                let bytes: &[u8] = Deserialize::deserialize(deser)?;\n                Self::deserialize(&bytes).map_err(|e| {\n                    <D::Error as serde::de::Error>::custom(format!(\"{:?}\", e))\n                })\n            }\n        }\n\n        impl From<PublicKey> for $pk_aff {\n            fn from(pk: PublicKey) -> Self {\n                pk.point\n            }\n        }\n\n        impl<'a> From<&'a PublicKey> for &'a $pk_aff {\n            fn from(pk: &'a PublicKey) -> Self {\n                &pk.point\n            }\n        }\n\n        impl From<$pk_aff> for PublicKey {\n            fn from(point: $pk_aff) -> Self {\n                Self { point }\n            }\n        }\n\n        #[repr(transparent)]\n        #[derive(Debug, Clone, Copy)]\n        pub struct AggregatePublicKey {\n            point: $pk,\n        }\n\n        impl AggregatePublicKey {\n            pub fn from_public_key(pk: &PublicKey) -> Self {\n                let mut agg_pk = <$pk>::default();\n                unsafe {\n                    $pk_from_aff(&mut agg_pk, &pk.point);\n                }\n                Self { point: agg_pk }\n            }\n\n            pub fn to_public_key(&self) -> PublicKey {\n                let mut pk = <$pk_aff>::default();\n                unsafe {\n                    $pk_to_aff(&mut pk, &self.point);\n                }\n                PublicKey { point: pk }\n            }\n\n            // Aggregate\n            pub fn aggregate(\n                pks: &[&PublicKey],\n                pks_validate: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                if pks.len() == 0 {\n                    return Err(BLST_ERROR::BLST_AGGR_TYPE_MISMATCH);\n                }\n                if pks_validate {\n                    pks[0].validate()?;\n                }\n                let mut agg_pk = AggregatePublicKey::from_public_key(pks[0]);\n                for s in pks.iter().skip(1) {\n                    if pks_validate {\n                        s.validate()?;\n                    }\n                    unsafe {\n                        $pk_add_or_dbl_aff(\n                            &mut agg_pk.point,\n                            &agg_pk.point,\n                            &s.point,\n                        );\n                    }\n                }\n                Ok(agg_pk)\n            }\n\n            pub fn aggregate_with_randomness(\n                pks: &[PublicKey],\n                randomness: &[u8],\n                nbits: usize,\n                pks_groupcheck: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                if pks.len() == 0 {\n                    return Err(BLST_ERROR::BLST_AGGR_TYPE_MISMATCH);\n                }\n                if pks_groupcheck {\n                    pks.validate()?;\n                }\n                Ok(pks.mult(randomness, nbits))\n            }\n\n            pub fn aggregate_serialized(\n                pks: &[&[u8]],\n                pks_validate: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                // TODO - threading\n                if pks.len() == 0 {\n                    return Err(BLST_ERROR::BLST_AGGR_TYPE_MISMATCH);\n                }\n                let mut pk = if pks_validate {\n                    PublicKey::key_validate(pks[0])?\n                } else {\n                    PublicKey::from_bytes(pks[0])?\n                };\n                let mut agg_pk = AggregatePublicKey::from_public_key(&pk);\n                for s in pks.iter().skip(1) {\n                    pk = if pks_validate {\n                        PublicKey::key_validate(s)?\n                    } else {\n                        PublicKey::from_bytes(s)?\n                    };\n                    unsafe {\n                        $pk_add_or_dbl_aff(\n                            &mut agg_pk.point,\n                            &agg_pk.point,\n                            &pk.point,\n                        );\n                    }\n                }\n                Ok(agg_pk)\n            }\n\n            pub fn add_aggregate(&mut self, agg_pk: &AggregatePublicKey) {\n                unsafe {\n                    $pk_add_or_dbl(&mut self.point, &self.point, &agg_pk.point);\n                }\n            }\n\n            pub fn sub_aggregate(&mut self, agg_pk: &AggregatePublicKey) {\n                unsafe {\n                    let mut tmp = agg_pk.clone();\n                    $pk_cneg(&mut tmp.point, true);\n                    $pk_add_or_dbl(&mut self.point, &self.point, &tmp.point);\n                }\n            }\n\n            pub fn add_public_key(\n                &mut self,\n                pk: &PublicKey,\n                pk_validate: bool,\n            ) -> Result<(), BLST_ERROR> {\n                if pk_validate {\n                    pk.validate()?;\n                }\n                unsafe {\n                    $pk_add_or_dbl_aff(&mut self.point, &self.point, &pk.point);\n                }\n                Ok(())\n            }\n        }\n\n        impl From<AggregatePublicKey> for $pk {\n            fn from(pk: AggregatePublicKey) -> Self {\n                pk.point\n            }\n        }\n\n        impl<'a> From<&'a AggregatePublicKey> for &'a $pk {\n            fn from(pk: &'a AggregatePublicKey) -> Self {\n                &pk.point\n            }\n        }\n\n        impl From<$pk> for AggregatePublicKey {\n            fn from(point: $pk) -> Self {\n                Self { point }\n            }\n        }\n\n        #[repr(transparent)]\n        #[derive(Debug, Clone, Copy)]\n        pub struct Signature {\n            point: $sig_aff,\n        }\n\n        impl Signature {\n            // sig_infcheck, check for infinity, is a way to avoid going\n            // into resource-consuming verification. Passing 'false' is\n            // always cryptographically safe, but application might want\n            // to guard against obviously bogus individual[!] signatures.\n            pub fn validate(\n                &self,\n                sig_infcheck: bool,\n            ) -> Result<(), BLST_ERROR> {\n                unsafe {\n                    if sig_infcheck && $sig_is_inf(&self.point) {\n                        return Err(BLST_ERROR::BLST_PK_IS_INFINITY);\n                    }\n                    if !$sig_in_group(&self.point) {\n                        return Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP);\n                    }\n                }\n                Ok(())\n            }\n\n            pub fn sig_validate(\n                sig: &[u8],\n                sig_infcheck: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                let sig = Signature::from_bytes(sig)?;\n                sig.validate(sig_infcheck)?;\n                Ok(sig)\n            }\n\n            pub fn verify(\n                &self,\n                sig_groupcheck: bool,\n                msg: &[u8],\n                dst: &[u8],\n                aug: &[u8],\n                pk: &PublicKey,\n                pk_validate: bool,\n            ) -> BLST_ERROR {\n                let aug_msg = [aug, msg].concat();\n                self.aggregate_verify(\n                    sig_groupcheck,\n                    &[aug_msg.as_slice()],\n                    dst,\n                    &[pk],\n                    pk_validate,\n                )\n            }\n\n            #[cfg(not(feature = \"std\"))]\n            pub fn aggregate_verify(\n                &self,\n                sig_groupcheck: bool,\n                msgs: &[&[u8]],\n                dst: &[u8],\n                pks: &[&PublicKey],\n                pks_validate: bool,\n            ) -> BLST_ERROR {\n                let n_elems = pks.len();\n                if n_elems == 0 || msgs.len() != n_elems {\n                    return BLST_ERROR::BLST_VERIFY_FAIL;\n                }\n\n                let mut pairing = Pairing::new($hash_or_encode, dst);\n\n                let err = pairing.aggregate(\n                    &pks[0].point,\n                    pks_validate,\n                    &self.point,\n                    sig_groupcheck,\n                    &msgs[0],\n                    &[],\n                );\n                if err != BLST_ERROR::BLST_SUCCESS {\n                    return err;\n                }\n\n                for i in 1..n_elems {\n                    let err = pairing.aggregate(\n                        &pks[i].point,\n                        pks_validate,\n                        &unsafe { ptr::null::<$sig_aff>().as_ref() },\n                        false,\n                        &msgs[i],\n                        &[],\n                    );\n                    if err != BLST_ERROR::BLST_SUCCESS {\n                        return err;\n                    }\n                }\n\n                pairing.commit();\n\n                if pairing.finalverify(None) {\n                    BLST_ERROR::BLST_SUCCESS\n                } else {\n                    BLST_ERROR::BLST_VERIFY_FAIL\n                }\n            }\n\n            #[cfg(feature = \"std\")]\n            pub fn aggregate_verify(\n                &self,\n                sig_groupcheck: bool,\n                msgs: &[&[u8]],\n                dst: &[u8],\n                pks: &[&PublicKey],\n                pks_validate: bool,\n            ) -> BLST_ERROR {\n                let n_elems = pks.len();\n                if n_elems == 0 || msgs.len() != n_elems {\n                    return BLST_ERROR::BLST_VERIFY_FAIL;\n                }\n\n                // TODO - check msg uniqueness?\n\n                let pool = mt::da_pool();\n                let counter = Arc::new(AtomicUsize::new(0));\n                let valid = Arc::new(AtomicBool::new(true));\n                let n_workers = core::cmp::min(pool.max_count(), n_elems);\n                let (tx, rx) = sync_channel(n_workers);\n                for _ in 0..n_workers {\n                    let tx = tx.clone();\n                    let counter = counter.clone();\n                    let valid = valid.clone();\n\n                    pool.joined_execute(move || {\n                        let mut pairing = Pairing::new($hash_or_encode, dst);\n\n                        while valid.load(Ordering::Relaxed) {\n                            let work = counter.fetch_add(1, Ordering::Relaxed);\n                            if work >= n_elems {\n                                break;\n                            }\n                            if pairing.aggregate(\n                                &pks[work].point,\n                                pks_validate,\n                                &unsafe { ptr::null::<$sig_aff>().as_ref() },\n                                false,\n                                &msgs[work],\n                                &[],\n                            ) != BLST_ERROR::BLST_SUCCESS\n                            {\n                                valid.store(false, Ordering::Relaxed);\n                                break;\n                            }\n                        }\n                        if valid.load(Ordering::Relaxed) {\n                            pairing.commit();\n                        }\n                        tx.send(pairing).expect(\"disaster\");\n                    });\n                }\n\n                if sig_groupcheck && valid.load(Ordering::Relaxed) {\n                    match self.validate(false) {\n                        Err(_err) => valid.store(false, Ordering::Relaxed),\n                        _ => (),\n                    }\n                }\n\n                let mut gtsig = blst_fp12::default();\n                if valid.load(Ordering::Relaxed) {\n                    Pairing::aggregated(&mut gtsig, &self.point);\n                }\n\n                let mut acc = rx.recv().unwrap();\n                for _ in 1..n_workers {\n                    acc.merge(&rx.recv().unwrap());\n                }\n\n                if valid.load(Ordering::Relaxed)\n                    && acc.finalverify(Some(&gtsig))\n                {\n                    BLST_ERROR::BLST_SUCCESS\n                } else {\n                    BLST_ERROR::BLST_VERIFY_FAIL\n                }\n            }\n\n            // pks are assumed to be verified for proof of possession,\n            // which implies that they are already group-checked\n            pub fn fast_aggregate_verify(\n                &self,\n                sig_groupcheck: bool,\n                msg: &[u8],\n                dst: &[u8],\n                pks: &[&PublicKey],\n            ) -> BLST_ERROR {\n                let agg_pk = match AggregatePublicKey::aggregate(pks, false) {\n                    Ok(agg_sig) => agg_sig,\n                    Err(err) => return err,\n                };\n                let pk = agg_pk.to_public_key();\n                self.aggregate_verify(\n                    sig_groupcheck,\n                    &[msg],\n                    dst,\n                    &[&pk],\n                    false,\n                )\n            }\n\n            pub fn fast_aggregate_verify_pre_aggregated(\n                &self,\n                sig_groupcheck: bool,\n                msg: &[u8],\n                dst: &[u8],\n                pk: &PublicKey,\n            ) -> BLST_ERROR {\n                self.aggregate_verify(sig_groupcheck, &[msg], dst, &[pk], false)\n            }\n\n            // https://ethresear.ch/t/fast-verification-of-multiple-bls-signatures/5407\n            #[cfg(feature = \"std\")]\n            #[allow(clippy::too_many_arguments)]\n            pub fn verify_multiple_aggregate_signatures(\n                msgs: &[&[u8]],\n                dst: &[u8],\n                pks: &[&PublicKey],\n                pks_validate: bool,\n                sigs: &[&Signature],\n                sigs_groupcheck: bool,\n                rands: &[blst_scalar],\n                rand_bits: usize,\n            ) -> BLST_ERROR {\n                let n_elems = pks.len();\n                if n_elems == 0\n                    || msgs.len() != n_elems\n                    || sigs.len() != n_elems\n                    || rands.len() != n_elems\n                {\n                    return BLST_ERROR::BLST_VERIFY_FAIL;\n                }\n\n                // TODO - check msg uniqueness?\n\n                let pool = mt::da_pool();\n                let counter = Arc::new(AtomicUsize::new(0));\n                let valid = Arc::new(AtomicBool::new(true));\n                let n_workers = core::cmp::min(pool.max_count(), n_elems);\n                let (tx, rx) = sync_channel(n_workers);\n                for _ in 0..n_workers {\n                    let tx = tx.clone();\n                    let counter = counter.clone();\n                    let valid = valid.clone();\n\n                    pool.joined_execute(move || {\n                        let mut pairing = Pairing::new($hash_or_encode, dst);\n\n                        // TODO - engage multi-point mul-n-add for larger\n                        // amount of inputs...\n                        while valid.load(Ordering::Relaxed) {\n                            let work = counter.fetch_add(1, Ordering::Relaxed);\n                            if work >= n_elems {\n                                break;\n                            }\n\n                            if pairing.mul_n_aggregate(\n                                &pks[work].point,\n                                pks_validate,\n                                &sigs[work].point,\n                                sigs_groupcheck,\n                                &rands[work].b,\n                                rand_bits,\n                                msgs[work],\n                                &[],\n                            ) != BLST_ERROR::BLST_SUCCESS\n                            {\n                                valid.store(false, Ordering::Relaxed);\n                                break;\n                            }\n                        }\n                        if valid.load(Ordering::Relaxed) {\n                            pairing.commit();\n                        }\n                        tx.send(pairing).expect(\"disaster\");\n                    });\n                }\n\n                let mut acc = rx.recv().unwrap();\n                for _ in 1..n_workers {\n                    acc.merge(&rx.recv().unwrap());\n                }\n\n                if valid.load(Ordering::Relaxed) && acc.finalverify(None) {\n                    BLST_ERROR::BLST_SUCCESS\n                } else {\n                    BLST_ERROR::BLST_VERIFY_FAIL\n                }\n            }\n\n            #[cfg(not(feature = \"std\"))]\n            #[allow(clippy::too_many_arguments)]\n            pub fn verify_multiple_aggregate_signatures(\n                msgs: &[&[u8]],\n                dst: &[u8],\n                pks: &[&PublicKey],\n                pks_validate: bool,\n                sigs: &[&Signature],\n                sigs_groupcheck: bool,\n                rands: &[blst_scalar],\n                rand_bits: usize,\n            ) -> BLST_ERROR {\n                let n_elems = pks.len();\n                if n_elems == 0\n                    || msgs.len() != n_elems\n                    || sigs.len() != n_elems\n                    || rands.len() != n_elems\n                {\n                    return BLST_ERROR::BLST_VERIFY_FAIL;\n                }\n\n                // TODO - check msg uniqueness?\n\n                let mut pairing = Pairing::new($hash_or_encode, dst);\n\n                for i in 0..n_elems {\n                    let err = pairing.mul_n_aggregate(\n                        &pks[i].point,\n                        pks_validate,\n                        &sigs[i].point,\n                        sigs_groupcheck,\n                        &rands[i].b,\n                        rand_bits,\n                        msgs[i],\n                        &[],\n                    );\n                    if err != BLST_ERROR::BLST_SUCCESS {\n                        return err;\n                    }\n                }\n\n                pairing.commit();\n\n                if pairing.finalverify(None) {\n                    BLST_ERROR::BLST_SUCCESS\n                } else {\n                    BLST_ERROR::BLST_VERIFY_FAIL\n                }\n            }\n\n            pub fn from_aggregate(agg_sig: &AggregateSignature) -> Self {\n                let mut sig_aff = <$sig_aff>::default();\n                unsafe {\n                    $sig_to_aff(&mut sig_aff, &agg_sig.point);\n                }\n                Self { point: sig_aff }\n            }\n\n            pub fn compress(&self) -> [u8; $sig_comp_size] {\n                let mut sig_comp = [0; $sig_comp_size];\n                unsafe {\n                    $sig_comp(sig_comp.as_mut_ptr(), &self.point);\n                }\n                sig_comp\n            }\n\n            pub fn serialize(&self) -> [u8; $sig_ser_size] {\n                let mut sig_out = [0; $sig_ser_size];\n                unsafe {\n                    $sig_ser(sig_out.as_mut_ptr(), &self.point);\n                }\n                sig_out\n            }\n\n            pub fn uncompress(sig_comp: &[u8]) -> Result<Self, BLST_ERROR> {\n                if sig_comp.len() == $sig_comp_size && (sig_comp[0] & 0x80) != 0\n                {\n                    let mut sig = <$sig_aff>::default();\n                    let err =\n                        unsafe { $sig_uncomp(&mut sig, sig_comp.as_ptr()) };\n                    if err != BLST_ERROR::BLST_SUCCESS {\n                        return Err(err);\n                    }\n                    Ok(Self { point: sig })\n                } else {\n                    Err(BLST_ERROR::BLST_BAD_ENCODING)\n                }\n            }\n\n            pub fn deserialize(sig_in: &[u8]) -> Result<Self, BLST_ERROR> {\n                if (sig_in.len() == $sig_ser_size && (sig_in[0] & 0x80) == 0)\n                    || (sig_in.len() == $sig_comp_size\n                        && (sig_in[0] & 0x80) != 0)\n                {\n                    let mut sig = <$sig_aff>::default();\n                    let err = unsafe { $sig_deser(&mut sig, sig_in.as_ptr()) };\n                    if err != BLST_ERROR::BLST_SUCCESS {\n                        return Err(err);\n                    }\n                    Ok(Self { point: sig })\n                } else {\n                    Err(BLST_ERROR::BLST_BAD_ENCODING)\n                }\n            }\n\n            pub fn from_bytes(sig_in: &[u8]) -> Result<Self, BLST_ERROR> {\n                Signature::deserialize(sig_in)\n            }\n\n            pub fn to_bytes(&self) -> [u8; $sig_comp_size] {\n                self.compress()\n            }\n\n            pub fn subgroup_check(&self) -> bool {\n                unsafe { $sig_in_group(&self.point) }\n            }\n        }\n\n        // Trait for equality comparisons which are equivalence relations.\n        //\n        // This means, that in addition to a == b and a != b being strict\n        // inverses, the equality must be reflexive, symmetric and transitive.\n        impl Eq for Signature {}\n\n        impl PartialEq for Signature {\n            fn eq(&self, other: &Self) -> bool {\n                unsafe { $sig_eq(&self.point, &other.point) }\n            }\n        }\n\n        #[cfg(feature = \"serde\")]\n        impl Serialize for Signature {\n            fn serialize<S: Serializer>(\n                &self,\n                ser: S,\n            ) -> Result<S::Ok, S::Error> {\n                ser.serialize_bytes(&self.serialize())\n            }\n        }\n\n        #[cfg(feature = \"serde\")]\n        impl<'de> Deserialize<'de> for Signature {\n            fn deserialize<D: Deserializer<'de>>(\n                deser: D,\n            ) -> Result<Self, D::Error> {\n                let bytes: &[u8] = Deserialize::deserialize(deser)?;\n                Self::deserialize(&bytes).map_err(|e| {\n                    <D::Error as serde::de::Error>::custom(format!(\"{:?}\", e))\n                })\n            }\n        }\n\n        impl From<Signature> for $sig_aff {\n            fn from(sig: Signature) -> Self {\n                sig.point\n            }\n        }\n\n        impl<'a> From<&'a Signature> for &'a $sig_aff {\n            fn from(sig: &'a Signature) -> Self {\n                &sig.point\n            }\n        }\n\n        impl From<$sig_aff> for Signature {\n            fn from(point: $sig_aff) -> Self {\n                Self { point }\n            }\n        }\n\n        #[repr(transparent)]\n        #[derive(Debug, Clone, Copy)]\n        pub struct AggregateSignature {\n            point: $sig,\n        }\n\n        impl AggregateSignature {\n            pub fn validate(&self) -> Result<(), BLST_ERROR> {\n                unsafe {\n                    if !$sig_aggr_in_group(&self.point) {\n                        return Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP);\n                    }\n                }\n                Ok(())\n            }\n\n            pub fn from_signature(sig: &Signature) -> Self {\n                let mut agg_sig = <$sig>::default();\n                unsafe {\n                    $sig_from_aff(&mut agg_sig, &sig.point);\n                }\n                Self { point: agg_sig }\n            }\n\n            pub fn to_signature(&self) -> Signature {\n                let mut sig = <$sig_aff>::default();\n                unsafe {\n                    $sig_to_aff(&mut sig, &self.point);\n                }\n                Signature { point: sig }\n            }\n\n            // Aggregate\n            pub fn aggregate(\n                sigs: &[&Signature],\n                sigs_groupcheck: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                if sigs.len() == 0 {\n                    return Err(BLST_ERROR::BLST_AGGR_TYPE_MISMATCH);\n                }\n                if sigs_groupcheck {\n                    // We can't actually judge if input is individual or\n                    // aggregated signature, so we can't enforce infinity\n                    // check.\n                    sigs[0].validate(false)?;\n                }\n                let mut agg_sig = AggregateSignature::from_signature(sigs[0]);\n                for s in sigs.iter().skip(1) {\n                    if sigs_groupcheck {\n                        s.validate(false)?;\n                    }\n                    unsafe {\n                        $sig_add_or_dbl_aff(\n                            &mut agg_sig.point,\n                            &agg_sig.point,\n                            &s.point,\n                        );\n                    }\n                }\n                Ok(agg_sig)\n            }\n\n            pub fn aggregate_with_randomness(\n                sigs: &[Signature],\n                randomness: &[u8],\n                nbits: usize,\n                sigs_groupcheck: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                if sigs.len() == 0 {\n                    return Err(BLST_ERROR::BLST_AGGR_TYPE_MISMATCH);\n                }\n                if sigs_groupcheck {\n                    sigs.validate()?;\n                }\n                Ok(sigs.mult(randomness, nbits))\n            }\n\n            pub fn aggregate_serialized(\n                sigs: &[&[u8]],\n                sigs_groupcheck: bool,\n            ) -> Result<Self, BLST_ERROR> {\n                // TODO - threading\n                if sigs.len() == 0 {\n                    return Err(BLST_ERROR::BLST_AGGR_TYPE_MISMATCH);\n                }\n                let mut sig = if sigs_groupcheck {\n                    Signature::sig_validate(sigs[0], false)?\n                } else {\n                    Signature::from_bytes(sigs[0])?\n                };\n                let mut agg_sig = AggregateSignature::from_signature(&sig);\n                for s in sigs.iter().skip(1) {\n                    sig = if sigs_groupcheck {\n                        Signature::sig_validate(s, false)?\n                    } else {\n                        Signature::from_bytes(s)?\n                    };\n                    unsafe {\n                        $sig_add_or_dbl_aff(\n                            &mut agg_sig.point,\n                            &agg_sig.point,\n                            &sig.point,\n                        );\n                    }\n                }\n                Ok(agg_sig)\n            }\n\n            pub fn add_aggregate(&mut self, agg_sig: &AggregateSignature) {\n                unsafe {\n                    $sig_add_or_dbl(\n                        &mut self.point,\n                        &self.point,\n                        &agg_sig.point,\n                    );\n                }\n            }\n\n            pub fn add_signature(\n                &mut self,\n                sig: &Signature,\n                sig_groupcheck: bool,\n            ) -> Result<(), BLST_ERROR> {\n                if sig_groupcheck {\n                    sig.validate(false)?;\n                }\n                unsafe {\n                    $sig_add_or_dbl_aff(\n                        &mut self.point,\n                        &self.point,\n                        &sig.point,\n                    );\n                }\n                Ok(())\n            }\n\n            pub fn subgroup_check(&self) -> bool {\n                unsafe { $sig_aggr_in_group(&self.point) }\n            }\n        }\n\n        impl From<AggregateSignature> for $sig {\n            fn from(sig: AggregateSignature) -> Self {\n                sig.point\n            }\n        }\n\n        impl<'a> From<&'a AggregateSignature> for &'a $sig {\n            fn from(sig: &'a AggregateSignature) -> Self {\n                &sig.point\n            }\n        }\n\n        impl From<$sig> for AggregateSignature {\n            fn from(point: $sig) -> Self {\n                Self { point }\n            }\n        }\n\n        impl MultiPoint for [PublicKey] {\n            type Output = AggregatePublicKey;\n\n            fn mult(&self, scalars: &[u8], nbits: usize) -> Self::Output {\n                Self::Output {\n                    point: unsafe { transmute::<&[_], &[$pk_aff]>(self) }\n                        .mult(scalars, nbits),\n                }\n            }\n\n            fn add(&self) -> Self::Output {\n                Self::Output {\n                    point: unsafe { transmute::<&[_], &[$pk_aff]>(self) }\n                        .add(),\n                }\n            }\n\n            fn validate(&self) -> Result<(), BLST_ERROR> {\n                unsafe { transmute::<&[_], &[$pk_aff]>(self) }.validate()\n            }\n        }\n\n        impl MultiPoint for [Signature] {\n            type Output = AggregateSignature;\n\n            fn mult(&self, scalars: &[u8], nbits: usize) -> Self::Output {\n                Self::Output {\n                    point: unsafe { transmute::<&[_], &[$sig_aff]>(self) }\n                        .mult(scalars, nbits),\n                }\n            }\n\n            fn add(&self) -> Self::Output {\n                Self::Output {\n                    point: unsafe { transmute::<&[_], &[$sig_aff]>(self) }\n                        .add(),\n                }\n            }\n\n            fn validate(&self) -> Result<(), BLST_ERROR> {\n                unsafe { transmute::<&[_], &[$sig_aff]>(self) }.validate()\n            }\n        }\n\n        #[cfg(test)]\n        mod tests {\n            use super::*;\n            use rand::{RngCore, SeedableRng};\n            use rand_chacha::ChaCha20Rng;\n\n            // Testing only - do not use for production\n            pub fn gen_random_key(\n                rng: &mut rand_chacha::ChaCha20Rng,\n            ) -> SecretKey {\n                let mut ikm = [0u8; 32];\n                rng.fill_bytes(&mut ikm);\n\n                let mut sk = <blst_scalar>::default();\n                unsafe {\n                    blst_keygen(&mut sk, ikm.as_ptr(), 32, ptr::null(), 0);\n                }\n                SecretKey { value: sk }\n            }\n\n            #[test]\n            fn test_sign_n_verify() {\n                let ikm: [u8; 32] = [\n                    0x93, 0xad, 0x7e, 0x65, 0xde, 0xad, 0x05, 0x2a, 0x08, 0x3a,\n                    0x91, 0x0c, 0x8b, 0x72, 0x85, 0x91, 0x46, 0x4c, 0xca, 0x56,\n                    0x60, 0x5b, 0xb0, 0x56, 0xed, 0xfe, 0x2b, 0x60, 0xa6, 0x3c,\n                    0x48, 0x99,\n                ];\n\n                let sk = SecretKey::key_gen(&ikm, &[]).unwrap();\n                let pk = sk.sk_to_pk();\n\n                let dst = b\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_NUL_\";\n                let msg = b\"hello foo\";\n                let sig = sk.sign(msg, dst, &[]);\n\n                let err = sig.verify(true, msg, dst, &[], &pk, true);\n                assert_eq!(err, BLST_ERROR::BLST_SUCCESS);\n            }\n\n            #[test]\n            fn test_aggregate() {\n                let num_msgs = 10;\n                let dst = b\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_NUL_\";\n\n                let seed = [0u8; 32];\n                let mut rng = ChaCha20Rng::from_seed(seed);\n\n                let sks: Vec<_> =\n                    (0..num_msgs).map(|_| gen_random_key(&mut rng)).collect();\n                let pks =\n                    sks.iter().map(|sk| sk.sk_to_pk()).collect::<Vec<_>>();\n                let pks_refs: Vec<&PublicKey> =\n                    pks.iter().map(|pk| pk).collect();\n                let pks_rev: Vec<&PublicKey> =\n                    pks.iter().rev().map(|pk| pk).collect();\n\n                let pk_comp = pks[0].compress();\n                let pk_uncomp = PublicKey::uncompress(&pk_comp);\n                assert_eq!(pk_uncomp.is_ok(), true);\n\n                let mut msgs: Vec<Vec<u8>> = vec![vec![]; num_msgs];\n                for i in 0..num_msgs {\n                    let msg_len = (rng.next_u64() & 0x3F) + 1;\n                    msgs[i] = vec![0u8; msg_len as usize];\n                    rng.fill_bytes(&mut msgs[i]);\n                }\n\n                let msgs_refs: Vec<&[u8]> =\n                    msgs.iter().map(|m| m.as_slice()).collect();\n\n                let sigs = sks\n                    .iter()\n                    .zip(msgs.iter())\n                    .map(|(sk, m)| (sk.sign(m, dst, &[])))\n                    .collect::<Vec<Signature>>();\n\n                let mut errs = sigs\n                    .iter()\n                    .zip(msgs.iter())\n                    .zip(pks.iter())\n                    .map(|((s, m), pk)| (s.verify(true, m, dst, &[], pk, true)))\n                    .collect::<Vec<BLST_ERROR>>();\n                assert_eq!(errs, vec![BLST_ERROR::BLST_SUCCESS; num_msgs]);\n\n                // Swap message/public key pairs to create bad signature\n                errs = sigs\n                    .iter()\n                    .zip(msgs.iter())\n                    .zip(pks.iter().rev())\n                    .map(|((s, m), pk)| (s.verify(true, m, dst, &[], pk, true)))\n                    .collect::<Vec<BLST_ERROR>>();\n                assert_ne!(errs, vec![BLST_ERROR::BLST_SUCCESS; num_msgs]);\n\n                let sig_refs =\n                    sigs.iter().map(|s| s).collect::<Vec<&Signature>>();\n                let agg = match AggregateSignature::aggregate(&sig_refs, true) {\n                    Ok(agg) => agg,\n                    Err(err) => panic!(\"aggregate failure: {:?}\", err),\n                };\n\n                let agg_sig = agg.to_signature();\n                let mut result = agg_sig\n                    .aggregate_verify(false, &msgs_refs, dst, &pks_refs, false);\n                assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n\n                // Swap message/public key pairs to create bad signature\n                result = agg_sig\n                    .aggregate_verify(false, &msgs_refs, dst, &pks_rev, false);\n                assert_ne!(result, BLST_ERROR::BLST_SUCCESS);\n            }\n\n            #[test]\n            fn test_multiple_agg_sigs() {\n                let dst = b\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_\";\n                let num_pks_per_sig = 10;\n                let num_sigs = 10;\n\n                let seed = [0u8; 32];\n                let mut rng = ChaCha20Rng::from_seed(seed);\n\n                let mut msgs: Vec<Vec<u8>> = vec![vec![]; num_sigs];\n                let mut sigs: Vec<Signature> = Vec::with_capacity(num_sigs);\n                let mut pks: Vec<PublicKey> = Vec::with_capacity(num_sigs);\n                let mut rands: Vec<blst_scalar> = Vec::with_capacity(num_sigs);\n                for i in 0..num_sigs {\n                    // Create public keys\n                    let sks_i: Vec<_> = (0..num_pks_per_sig)\n                        .map(|_| gen_random_key(&mut rng))\n                        .collect();\n\n                    let pks_i = sks_i\n                        .iter()\n                        .map(|sk| sk.sk_to_pk())\n                        .collect::<Vec<_>>();\n                    let pks_refs_i: Vec<&PublicKey> =\n                        pks_i.iter().map(|pk| pk).collect();\n\n                    // Create random message for pks to all sign\n                    let msg_len = (rng.next_u64() & 0x3F) + 1;\n                    msgs[i] = vec![0u8; msg_len as usize];\n                    rng.fill_bytes(&mut msgs[i]);\n\n                    // Generate signature for each key pair\n                    let sigs_i = sks_i\n                        .iter()\n                        .map(|sk| sk.sign(&msgs[i], dst, &[]))\n                        .collect::<Vec<Signature>>();\n\n                    // Test each current single signature\n                    let errs = sigs_i\n                        .iter()\n                        .zip(pks_i.iter())\n                        .map(|(s, pk)| {\n                            (s.verify(true, &msgs[i], dst, &[], pk, true))\n                        })\n                        .collect::<Vec<BLST_ERROR>>();\n                    assert_eq!(\n                        errs,\n                        vec![BLST_ERROR::BLST_SUCCESS; num_pks_per_sig]\n                    );\n\n                    let sig_refs_i =\n                        sigs_i.iter().map(|s| s).collect::<Vec<&Signature>>();\n                    let agg_i =\n                        match AggregateSignature::aggregate(&sig_refs_i, false)\n                        {\n                            Ok(agg_i) => agg_i,\n                            Err(err) => panic!(\"aggregate failure: {:?}\", err),\n                        };\n\n                    // Test current aggregate signature\n                    sigs.push(agg_i.to_signature());\n                    let mut result = sigs[i].fast_aggregate_verify(\n                        false,\n                        &msgs[i],\n                        dst,\n                        &pks_refs_i,\n                    );\n                    assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n\n                    // negative test\n                    if i != 0 {\n                        result = sigs[i - 1].fast_aggregate_verify(\n                            false,\n                            &msgs[i],\n                            dst,\n                            &pks_refs_i,\n                        );\n                        assert_ne!(result, BLST_ERROR::BLST_SUCCESS);\n                    }\n\n                    // aggregate public keys and push into vec\n                    let agg_pk_i =\n                        match AggregatePublicKey::aggregate(&pks_refs_i, false)\n                        {\n                            Ok(agg_pk_i) => agg_pk_i,\n                            Err(err) => panic!(\"aggregate failure: {:?}\", err),\n                        };\n                    pks.push(agg_pk_i.to_public_key());\n\n                    // Test current aggregate signature with aggregated pks\n                    result = sigs[i].fast_aggregate_verify_pre_aggregated(\n                        false, &msgs[i], dst, &pks[i],\n                    );\n                    assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n\n                    // negative test\n                    if i != 0 {\n                        result = sigs[i - 1]\n                            .fast_aggregate_verify_pre_aggregated(\n                                false, &msgs[i], dst, &pks[i],\n                            );\n                        assert_ne!(result, BLST_ERROR::BLST_SUCCESS);\n                    }\n\n                    // create random values\n                    let mut vals = [0u64; 4];\n                    vals[0] = rng.next_u64();\n                    while vals[0] == 0 {\n                        // Reject zero as it is used for multiplication.\n                        vals[0] = rng.next_u64();\n                    }\n                    let mut rand_i = MaybeUninit::<blst_scalar>::uninit();\n                    unsafe {\n                        blst_scalar_from_uint64(\n                            rand_i.as_mut_ptr(),\n                            vals.as_ptr(),\n                        );\n                        rands.push(rand_i.assume_init());\n                    }\n                }\n\n                let msgs_refs: Vec<&[u8]> =\n                    msgs.iter().map(|m| m.as_slice()).collect();\n                let sig_refs =\n                    sigs.iter().map(|s| s).collect::<Vec<&Signature>>();\n                let pks_refs: Vec<&PublicKey> =\n                    pks.iter().map(|pk| pk).collect();\n\n                let msgs_rev: Vec<&[u8]> =\n                    msgs.iter().rev().map(|m| m.as_slice()).collect();\n                let sig_rev =\n                    sigs.iter().rev().map(|s| s).collect::<Vec<&Signature>>();\n                let pks_rev: Vec<&PublicKey> =\n                    pks.iter().rev().map(|pk| pk).collect();\n\n                let mut result =\n                    Signature::verify_multiple_aggregate_signatures(\n                        &msgs_refs, dst, &pks_refs, false, &sig_refs, true,\n                        &rands, 64,\n                    );\n                assert_eq!(result, BLST_ERROR::BLST_SUCCESS);\n\n                // negative tests (use reverse msgs, pks, and sigs)\n                result = Signature::verify_multiple_aggregate_signatures(\n                    &msgs_rev, dst, &pks_refs, false, &sig_refs, true, &rands,\n                    64,\n                );\n                assert_ne!(result, BLST_ERROR::BLST_SUCCESS);\n\n                result = Signature::verify_multiple_aggregate_signatures(\n                    &msgs_refs, dst, &pks_rev, false, &sig_refs, true, &rands,\n                    64,\n                );\n                assert_ne!(result, BLST_ERROR::BLST_SUCCESS);\n\n                result = Signature::verify_multiple_aggregate_signatures(\n                    &msgs_refs, dst, &pks_refs, false, &sig_rev, true, &rands,\n                    64,\n                );\n                assert_ne!(result, BLST_ERROR::BLST_SUCCESS);\n            }\n\n            #[test]\n            fn test_serialization() {\n                let seed = [0u8; 32];\n                let mut rng = ChaCha20Rng::from_seed(seed);\n\n                let sk = gen_random_key(&mut rng);\n                let sk2 = gen_random_key(&mut rng);\n\n                let pk = sk.sk_to_pk();\n                let pk_comp = pk.compress();\n                let pk_ser = pk.serialize();\n\n                let pk_uncomp = PublicKey::uncompress(&pk_comp);\n                assert_eq!(pk_uncomp.is_ok(), true);\n                assert_eq!(pk_uncomp.unwrap(), pk);\n\n                let pk_deser = PublicKey::deserialize(&pk_ser);\n                assert_eq!(pk_deser.is_ok(), true);\n                assert_eq!(pk_deser.unwrap(), pk);\n\n                let pk2 = sk2.sk_to_pk();\n                let pk_comp2 = pk2.compress();\n                let pk_ser2 = pk2.serialize();\n\n                let pk_uncomp2 = PublicKey::uncompress(&pk_comp2);\n                assert_eq!(pk_uncomp2.is_ok(), true);\n                assert_eq!(pk_uncomp2.unwrap(), pk2);\n\n                let pk_deser2 = PublicKey::deserialize(&pk_ser2);\n                assert_eq!(pk_deser2.is_ok(), true);\n                assert_eq!(pk_deser2.unwrap(), pk2);\n\n                assert_ne!(pk, pk2);\n                assert_ne!(pk_uncomp.unwrap(), pk2);\n                assert_ne!(pk_deser.unwrap(), pk2);\n                assert_ne!(pk_uncomp2.unwrap(), pk);\n                assert_ne!(pk_deser2.unwrap(), pk);\n            }\n\n            #[cfg(feature = \"serde\")]\n            #[test]\n            fn test_serde() {\n                let seed = [0u8; 32];\n                let mut rng = ChaCha20Rng::from_seed(seed);\n\n                // generate a sk, pk, and sig, and make sure it signs\n                let sk = gen_random_key(&mut rng);\n                let pk = sk.sk_to_pk();\n                let sig = sk.sign(b\"asdf\", b\"qwer\", b\"zxcv\");\n                assert_eq!(\n                    sig.verify(true, b\"asdf\", b\"qwer\", b\"zxcv\", &pk, true),\n                    BLST_ERROR::BLST_SUCCESS\n                );\n\n                // roundtrip through serde\n                let pk_ser =\n                    rmp_serde::encode::to_vec_named(&pk).expect(\"ser pk\");\n                let sig_ser =\n                    rmp_serde::encode::to_vec_named(&sig).expect(\"ser sig\");\n                let pk_des: PublicKey =\n                    rmp_serde::decode::from_slice(&pk_ser).expect(\"des pk\");\n                let sig_des: Signature =\n                    rmp_serde::decode::from_slice(&sig_ser).expect(\"des sig\");\n\n                // check that we got back the right things\n                assert_eq!(pk, pk_des);\n                assert_eq!(sig, sig_des);\n                assert_eq!(\n                    sig.verify(true, b\"asdf\", b\"qwer\", b\"zxcv\", &pk_des, true),\n                    BLST_ERROR::BLST_SUCCESS\n                );\n                assert_eq!(\n                    sig_des.verify(true, b\"asdf\", b\"qwer\", b\"zxcv\", &pk, true),\n                    BLST_ERROR::BLST_SUCCESS\n                );\n                assert_eq!(sk.sign(b\"asdf\", b\"qwer\", b\"zxcv\"), sig_des);\n\n                #[cfg(feature = \"serde-secret\")]\n                if true {\n                    let sk_ser =\n                        rmp_serde::encode::to_vec_named(&sk).expect(\"ser sk\");\n                    let sk_des: SecretKey =\n                        rmp_serde::decode::from_slice(&sk_ser).expect(\"des sk\");\n                    // BLS signatures are deterministic, so this establishes\n                    // that sk == sk_des\n                    assert_eq!(sk_des.sign(b\"asdf\", b\"qwer\", b\"zxcv\"), sig);\n                }\n            }\n\n            #[test]\n            fn test_multi_point() {\n                let dst = b\"BLS_SIG_BLS12381G2_XMD:SHA-256_SSWU_RO_POP_\";\n                let num_pks = 13;\n\n                let seed = [0u8; 32];\n                let mut rng = ChaCha20Rng::from_seed(seed);\n\n                // Create public keys\n                let sks: Vec<_> =\n                    (0..num_pks).map(|_| gen_random_key(&mut rng)).collect();\n\n                let pks =\n                    sks.iter().map(|sk| sk.sk_to_pk()).collect::<Vec<_>>();\n                let pks_refs: Vec<&PublicKey> =\n                    pks.iter().map(|pk| pk).collect();\n\n                // Create random message for pks to all sign\n                let msg_len = (rng.next_u64() & 0x3F) + 1;\n                let mut msg = vec![0u8; msg_len as usize];\n                rng.fill_bytes(&mut msg);\n\n                // Generate signature for each key pair\n                let sigs = sks\n                    .iter()\n                    .map(|sk| sk.sign(&msg, dst, &[]))\n                    .collect::<Vec<Signature>>();\n                let sigs_refs: Vec<&Signature> =\n                    sigs.iter().map(|s| s).collect();\n\n                // create random values\n                let mut rands: Vec<u8> = Vec::with_capacity(8 * num_pks);\n                for _ in 0..num_pks {\n                    let mut r = rng.next_u64();\n                    while r == 0 {\n                        // Reject zero as it is used for multiplication.\n                        r = rng.next_u64();\n                    }\n                    rands.extend_from_slice(&r.to_le_bytes());\n                }\n\n                // Sanity test each current single signature\n                let errs = sigs\n                    .iter()\n                    .zip(pks.iter())\n                    .map(|(s, pk)| (s.verify(true, &msg, dst, &[], pk, true)))\n                    .collect::<Vec<BLST_ERROR>>();\n                assert_eq!(errs, vec![BLST_ERROR::BLST_SUCCESS; num_pks]);\n\n                // sanity test aggregated signature\n                let agg_pk = AggregatePublicKey::aggregate(&pks_refs, false)\n                    .unwrap()\n                    .to_public_key();\n                let agg_sig = AggregateSignature::aggregate(&sigs_refs, false)\n                    .unwrap()\n                    .to_signature();\n                let err = agg_sig.verify(true, &msg, dst, &[], &agg_pk, true);\n                assert_eq!(err, BLST_ERROR::BLST_SUCCESS);\n\n                // test multi-point aggregation using add\n                let agg_pk = pks.add().to_public_key();\n                let agg_sig = sigs.add().to_signature();\n                let err = agg_sig.verify(true, &msg, dst, &[], &agg_pk, true);\n                assert_eq!(err, BLST_ERROR::BLST_SUCCESS);\n\n                // test multi-point aggregation using mult\n                let agg_pk = pks.mult(&rands, 64).to_public_key();\n                let agg_sig = sigs.mult(&rands, 64).to_signature();\n                let err = agg_sig.verify(true, &msg, dst, &[], &agg_pk, true);\n                assert_eq!(err, BLST_ERROR::BLST_SUCCESS);\n            }\n        }\n    };\n}\n\npub mod min_pk {\n    use super::*;\n\n    sig_variant_impl!(\n        \"MinPk\",\n        blst_p1,\n        blst_p1_affine,\n        blst_p2,\n        blst_p2_affine,\n        blst_sk_to_pk2_in_g1,\n        true,\n        blst_hash_to_g2,\n        blst_sign_pk2_in_g1,\n        blst_p1_affine_is_equal,\n        blst_p2_affine_is_equal,\n        blst_core_verify_pk_in_g1,\n        blst_p1_affine_in_g1,\n        blst_p1_to_affine,\n        blst_p1_from_affine,\n        blst_p1_affine_serialize,\n        blst_p1_affine_compress,\n        blst_p1_deserialize,\n        blst_p1_uncompress,\n        48,\n        96,\n        blst_p2_affine_in_g2,\n        blst_p2_to_affine,\n        blst_p2_from_affine,\n        blst_p2_affine_serialize,\n        blst_p2_affine_compress,\n        blst_p2_deserialize,\n        blst_p2_uncompress,\n        96,\n        192,\n        blst_p1_add_or_double,\n        blst_p1_add_or_double_affine,\n        blst_p1_cneg,\n        blst_p2_add_or_double,\n        blst_p2_add_or_double_affine,\n        blst_p1_affine_is_inf,\n        blst_p2_affine_is_inf,\n        blst_p2_in_g2,\n    );\n}\n\npub mod min_sig {\n    use super::*;\n\n    sig_variant_impl!(\n        \"MinSig\",\n        blst_p2,\n        blst_p2_affine,\n        blst_p1,\n        blst_p1_affine,\n        blst_sk_to_pk2_in_g2,\n        true,\n        blst_hash_to_g1,\n        blst_sign_pk2_in_g2,\n        blst_p2_affine_is_equal,\n        blst_p1_affine_is_equal,\n        blst_core_verify_pk_in_g2,\n        blst_p2_affine_in_g2,\n        blst_p2_to_affine,\n        blst_p2_from_affine,\n        blst_p2_affine_serialize,\n        blst_p2_affine_compress,\n        blst_p2_deserialize,\n        blst_p2_uncompress,\n        96,\n        192,\n        blst_p1_affine_in_g1,\n        blst_p1_to_affine,\n        blst_p1_from_affine,\n        blst_p1_affine_serialize,\n        blst_p1_affine_compress,\n        blst_p1_deserialize,\n        blst_p1_uncompress,\n        48,\n        96,\n        blst_p2_add_or_double,\n        blst_p2_add_or_double_affine,\n        blst_p2_cneg,\n        blst_p1_add_or_double,\n        blst_p1_add_or_double_affine,\n        blst_p2_affine_is_inf,\n        blst_p1_affine_is_inf,\n        blst_p1_in_g1,\n    );\n}\n\npub trait MultiPoint {\n    type Output;\n\n    fn mult(&self, scalars: &[u8], nbits: usize) -> Self::Output;\n    fn add(&self) -> Self::Output;\n    fn validate(&self) -> Result<(), BLST_ERROR> {\n        Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP)\n    }\n}\n\n#[cfg(feature = \"std\")]\ninclude!(\"pippenger.rs\");\n\n#[cfg(not(feature = \"std\"))]\ninclude!(\"pippenger-no_std.rs\");\n\n#[cfg(test)]\nmod fp12_test {\n    use super::*;\n    use rand::{RngCore, SeedableRng};\n    use rand_chacha::ChaCha20Rng;\n\n    #[test]\n    fn miller_loop_n() {\n        const npoints: usize = 97;\n        const nbits: usize = 64;\n        const nbytes: usize = (nbits + 7) / 8;\n\n        let mut scalars = Box::new([0u8; nbytes * npoints]);\n        ChaCha20Rng::from_entropy().fill_bytes(scalars.as_mut());\n\n        let mut p1s: Vec<blst_p1> = Vec::with_capacity(npoints);\n        let mut p2s: Vec<blst_p2> = Vec::with_capacity(npoints);\n\n        unsafe {\n            p1s.set_len(npoints);\n            p2s.set_len(npoints);\n\n            for i in 0..npoints {\n                blst_p1_mult(\n                    &mut p1s[i],\n                    blst_p1_generator(),\n                    &scalars[i * nbytes],\n                    32,\n                );\n                blst_p2_mult(\n                    &mut p2s[i],\n                    blst_p2_generator(),\n                    &scalars[i * nbytes + 4],\n                    32,\n                );\n            }\n        }\n\n        let ps = p1_affines::from(&p1s);\n        let qs = p2_affines::from(&p2s);\n\n        let mut naive = blst_fp12::default();\n        for i in 0..npoints {\n            naive *= blst_fp12::miller_loop(&qs[i], &ps[i]);\n        }\n\n        assert_eq!(\n            naive,\n            blst_fp12::miller_loop_n(qs.as_slice(), ps.as_slice())\n        );\n    }\n}\n\n#[cfg(test)]\nmod sk_test {\n    use super::*;\n    use rand::{RngCore, SeedableRng};\n    use rand_chacha::ChaCha20Rng;\n\n    #[test]\n    fn inverse() {\n        let mut bytes = [0u8; 64];\n        ChaCha20Rng::from_entropy().fill_bytes(bytes.as_mut());\n\n        let mut sk = blst_scalar::default();\n        let mut p1 = blst_p1::default();\n        let mut p2 = blst_p2::default();\n\n        unsafe {\n            blst_scalar_from_be_bytes(&mut sk, bytes.as_ptr(), bytes.len());\n\n            blst_p1_mult(&mut p1, blst_p1_generator(), sk.b.as_ptr(), 255);\n            blst_sk_inverse(&mut sk, &sk);\n            blst_p1_mult(&mut p1, &p1, sk.b.as_ptr(), 255);\n\n            blst_p2_mult(&mut p2, blst_p2_generator(), sk.b.as_ptr(), 255);\n            blst_sk_inverse(&mut sk, &sk);\n            blst_p2_mult(&mut p2, &p2, sk.b.as_ptr(), 255);\n        }\n\n        assert_eq!(p1, unsafe { *blst_p1_generator() });\n        assert_eq!(p2, unsafe { *blst_p2_generator() });\n    }\n}\n"
  },
  {
    "path": "bindings/rust/src/pippenger-no_std.rs",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\nuse core::ops::{Index, IndexMut};\nuse core::slice::SliceIndex;\n\nmacro_rules! pippenger_mult_impl {\n    (\n        $points:ident,\n        $point:ty,\n        $point_affine:ty,\n        $to_affines:ident,\n        $scratch_sizeof:ident,\n        $multi_scalar_mult:ident,\n        $tile_mult:ident,\n        $add_or_double:ident,\n        $double:ident,\n        $test_mod:ident,\n        $generator:ident,\n        $mult:ident,\n        $add:ident,\n        $is_inf:ident,\n        $in_group:ident,\n    ) => {\n        pub struct $points {\n            points: Vec<$point_affine>,\n        }\n\n        impl<I: SliceIndex<[$point_affine]>> Index<I> for $points {\n            type Output = I::Output;\n\n            #[inline]\n            fn index(&self, i: I) -> &Self::Output {\n                &self.points[i]\n            }\n        }\n        impl<I: SliceIndex<[$point_affine]>> IndexMut<I> for $points {\n            #[inline]\n            fn index_mut(&mut self, i: I) -> &mut Self::Output {\n                &mut self.points[i]\n            }\n        }\n\n        impl $points {\n            #[inline]\n            pub fn as_slice(&self) -> &[$point_affine] {\n                self.points.as_slice()\n            }\n\n            pub fn from(points: &[$point]) -> Self {\n                let npoints = points.len();\n                let mut ret = Self {\n                    points: Vec::with_capacity(npoints),\n                };\n                #[allow(clippy::uninit_vec)]\n                unsafe { ret.points.set_len(npoints) };\n\n                let p: [*const $point; 2] = [&points[0], ptr::null()];\n                unsafe { $to_affines(&mut ret.points[0], &p[0], npoints) };\n                ret\n            }\n\n            #[inline]\n            pub fn mult(&self, scalars: &[u8], nbits: usize) -> $point {\n                self.as_slice().mult(scalars, nbits)\n            }\n\n            #[inline]\n            pub fn add(&self) -> $point {\n                self.as_slice().add()\n            }\n        }\n\n        impl MultiPoint for [$point_affine] {\n            type Output = $point;\n\n            fn mult(&self, scalars: &[u8], nbits: usize) -> $point {\n                let npoints = self.len();\n                let nbytes = (nbits + 7) / 8;\n\n                if scalars.len() < nbytes * npoints {\n                    panic!(\"scalars length mismatch\");\n                }\n\n                let p: [*const $point_affine; 2] = [&self[0], ptr::null()];\n                let s: [*const u8; 2] = [&scalars[0], ptr::null()];\n\n                let mut ret = <$point>::default();\n                unsafe {\n                    let mut scratch: Vec<u64> =\n                        Vec::with_capacity($scratch_sizeof(npoints) / 8);\n                    #[allow(clippy::uninit_vec)]\n                    scratch.set_len(scratch.capacity());\n                    $multi_scalar_mult(\n                        &mut ret,\n                        &p[0],\n                        npoints,\n                        &s[0],\n                        nbits,\n                        &mut scratch[0],\n                    );\n                }\n                ret\n            }\n\n            fn add(&self) -> $point {\n                let npoints = self.len();\n\n                let p: [*const _; 2] = [&self[0], ptr::null()];\n                let mut ret = <$point>::default();\n                unsafe { $add(&mut ret, &p[0], npoints) };\n\n                ret\n            }\n\n            fn validate(&self) -> Result<(), BLST_ERROR> {\n                for i in 0..self.len() {\n                    if unsafe { $is_inf(&self[i]) } {\n                        return Err(BLST_ERROR::BLST_PK_IS_INFINITY);\n                    }\n                    if !unsafe { $in_group(&self[i]) } {\n                        return Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP);\n                    }\n                }\n                Ok(())\n            }\n        }\n\n        #[cfg(test)]\n        pippenger_test_mod!(\n            $test_mod,\n            $points,\n            $point,\n            $add_or_double,\n            $generator,\n            $mult,\n        );\n    };\n}\n\n#[cfg(test)]\ninclude!(\"pippenger-test_mod.rs\");\n\npippenger_mult_impl!(\n    p1_affines,\n    blst_p1,\n    blst_p1_affine,\n    blst_p1s_to_affine,\n    blst_p1s_mult_pippenger_scratch_sizeof,\n    blst_p1s_mult_pippenger,\n    blst_p1s_tile_pippenger,\n    blst_p1_add_or_double,\n    blst_p1_double,\n    p1_multi_point,\n    blst_p1_generator,\n    blst_p1_mult,\n    blst_p1s_add,\n    blst_p1_affine_is_inf,\n    blst_p1_affine_in_g1,\n);\n\npippenger_mult_impl!(\n    p2_affines,\n    blst_p2,\n    blst_p2_affine,\n    blst_p2s_to_affine,\n    blst_p2s_mult_pippenger_scratch_sizeof,\n    blst_p2s_mult_pippenger,\n    blst_p2s_tile_pippenger,\n    blst_p2_add_or_double,\n    blst_p2_double,\n    p2_multi_point,\n    blst_p2_generator,\n    blst_p2_mult,\n    blst_p2s_add,\n    blst_p2_affine_is_inf,\n    blst_p2_affine_in_g2,\n);\n"
  },
  {
    "path": "bindings/rust/src/pippenger-test_mod.rs",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\nmacro_rules! pippenger_test_mod {\n    (\n        $test_mod:ident,\n        $points:ident,\n        $point:ty,\n        $add_or_double:ident,\n        $generator:ident,\n        $mult:ident,\n    ) => {\n        mod $test_mod {\n            use super::*;\n            use rand::{RngCore, SeedableRng};\n            use rand_chacha::ChaCha20Rng;\n\n            #[test]\n            fn test_mult() {\n                const npoints: usize = 2000;\n                const nbits: usize = 160;\n                const nbytes: usize = (nbits + 7) / 8;\n\n                let mut scalars = Box::new([0u8; nbytes * npoints]);\n                ChaCha20Rng::from_seed([0u8; 32]).fill_bytes(scalars.as_mut());\n\n                let mut points: Vec<$point> = Vec::with_capacity(npoints);\n                unsafe { points.set_len(points.capacity()) };\n\n                let mut naive = <$point>::default();\n                for i in 0..npoints {\n                    unsafe {\n                        let mut t = <$point>::default();\n                        $mult(\n                            &mut points[i],\n                            $generator(),\n                            &scalars[i * nbytes],\n                            core::cmp::min(32, nbits),\n                        );\n                        $mult(&mut t, &points[i], &scalars[i * nbytes], nbits);\n                        $add_or_double(&mut naive, &naive, &t);\n                    }\n                    if i < 27 {\n                        let points = $points::from(&points[0..i + 1]);\n                        assert_eq!(naive, points.mult(scalars.as_ref(), nbits));\n                    }\n                }\n\n                let points = $points::from(&points);\n\n                assert_eq!(naive, points.mult(scalars.as_ref(), nbits));\n            }\n\n            #[test]\n            fn test_add() {\n                const npoints: usize = 2000;\n                const nbits: usize = 32;\n                const nbytes: usize = (nbits + 7) / 8;\n\n                let mut scalars = Box::new([0u8; nbytes * npoints]);\n                ChaCha20Rng::from_seed([0u8; 32]).fill_bytes(scalars.as_mut());\n\n                let mut points: Vec<$point> = Vec::with_capacity(npoints);\n                unsafe { points.set_len(points.capacity()) };\n\n                let mut naive = <$point>::default();\n                for i in 0..npoints {\n                    unsafe {\n                        $mult(\n                            &mut points[i],\n                            $generator(),\n                            &scalars[i * nbytes],\n                            32,\n                        );\n                        $add_or_double(&mut naive, &naive, &points[i]);\n                    }\n                }\n\n                let points = $points::from(&points);\n                assert_eq!(naive, points.add());\n            }\n        }\n    };\n}\n"
  },
  {
    "path": "bindings/rust/src/pippenger.rs",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\nuse core::num::Wrapping;\nuse core::ops::{Index, IndexMut};\nuse core::slice::SliceIndex;\nuse std::sync::Barrier;\n\nstruct tile {\n    x: usize,\n    dx: usize,\n    y: usize,\n    dy: usize,\n}\n\n// Minimalist core::cell::Cell stand-in, but with Sync marker, which\n// makes it possible to pass it to multiple threads. It works, because\n// *here* each Cell is written only once and by just one thread.\n#[repr(transparent)]\nstruct Cell<T: ?Sized> {\n    value: T,\n}\nunsafe impl<T: ?Sized + Sync> Sync for Cell<T> {}\nimpl<T> Cell<T> {\n    pub fn as_ptr(&self) -> *mut T {\n        &self.value as *const T as *mut T\n    }\n}\n\nmacro_rules! pippenger_mult_impl {\n    (\n        $points:ident,\n        $point:ty,\n        $point_affine:ty,\n        $to_affines:ident,\n        $scratch_sizeof:ident,\n        $multi_scalar_mult:ident,\n        $tile_mult:ident,\n        $add_or_double:ident,\n        $double:ident,\n        $test_mod:ident,\n        $generator:ident,\n        $mult:ident,\n        $add:ident,\n        $is_inf:ident,\n        $in_group:ident,\n        $from_affine:ident,\n    ) => {\n        pub struct $points {\n            points: Vec<$point_affine>,\n        }\n\n        impl<I: SliceIndex<[$point_affine]>> Index<I> for $points {\n            type Output = I::Output;\n\n            #[inline]\n            fn index(&self, i: I) -> &Self::Output {\n                &self.points[i]\n            }\n        }\n        impl<I: SliceIndex<[$point_affine]>> IndexMut<I> for $points {\n            #[inline]\n            fn index_mut(&mut self, i: I) -> &mut Self::Output {\n                &mut self.points[i]\n            }\n        }\n\n        impl $points {\n            #[inline]\n            pub fn as_slice(&self) -> &[$point_affine] {\n                self.points.as_slice()\n            }\n\n            pub fn from(points: &[$point]) -> Self {\n                let npoints = points.len();\n                let mut ret = Self {\n                    points: Vec::with_capacity(npoints),\n                };\n                unsafe { ret.points.set_len(npoints) };\n\n                let pool = mt::da_pool();\n                let ncpus = pool.max_count();\n                if ncpus < 2 || npoints < 768 {\n                    let p: [*const $point; 2] = [&points[0], ptr::null()];\n                    unsafe { $to_affines(&mut ret.points[0], &p[0], npoints) };\n                    return ret;\n                }\n\n                let mut nslices = (npoints + 511) / 512;\n                nslices = core::cmp::min(nslices, ncpus);\n                let wg = Arc::new((Barrier::new(2), AtomicUsize::new(nslices)));\n\n                let (mut delta, mut rem) =\n                    (npoints / nslices + 1, Wrapping(npoints % nslices));\n                let mut x = 0usize;\n                while x < npoints {\n                    let out = &mut ret.points[x];\n                    let inp = &points[x];\n\n                    delta -= (rem == Wrapping(0)) as usize;\n                    rem -= Wrapping(1);\n                    x += delta;\n\n                    let wg = wg.clone();\n                    pool.joined_execute(move || {\n                        let p: [*const $point; 2] = [inp, ptr::null()];\n                        unsafe { $to_affines(out, &p[0], delta) };\n                        if wg.1.fetch_sub(1, Ordering::AcqRel) == 1 {\n                            wg.0.wait();\n                        }\n                    });\n                }\n                wg.0.wait();\n\n                ret\n            }\n\n            #[inline]\n            pub fn mult(&self, scalars: &[u8], nbits: usize) -> $point {\n                self.as_slice().mult(scalars, nbits)\n            }\n\n            #[inline]\n            pub fn add(&self) -> $point {\n                self.as_slice().add()\n            }\n        }\n\n        impl MultiPoint for [$point_affine] {\n            type Output = $point;\n\n            fn mult(&self, scalars: &[u8], nbits: usize) -> $point {\n                let npoints = self.len();\n                let nbytes = (nbits + 7) / 8;\n\n                if scalars.len() < nbytes * npoints {\n                    panic!(\"scalars length mismatch\");\n                }\n\n                let pool = mt::da_pool();\n                let ncpus = pool.max_count();\n                if ncpus < 2 {\n                    let p: [*const $point_affine; 2] = [&self[0], ptr::null()];\n                    let s: [*const u8; 2] = [&scalars[0], ptr::null()];\n\n                    unsafe {\n                        let mut scratch: Vec<u64> =\n                            Vec::with_capacity($scratch_sizeof(npoints) / 8);\n                        #[allow(clippy::uninit_vec)]\n                        scratch.set_len(scratch.capacity());\n                        let mut ret = <$point>::default();\n                        $multi_scalar_mult(\n                            &mut ret,\n                            &p[0],\n                            npoints,\n                            &s[0],\n                            nbits,\n                            &mut scratch[0],\n                        );\n                        return ret;\n                    }\n                }\n\n                if npoints < 32 {\n                    let counter = Arc::new(AtomicUsize::new(0));\n                    let n_workers = core::cmp::min(ncpus, npoints);\n                    let (tx, rx) = sync_channel(n_workers);\n                    for _ in 0..n_workers {\n                        let tx = tx.clone();\n                        let counter = counter.clone();\n\n                        pool.joined_execute(move || {\n                            let mut acc = <$point>::default();\n                            let mut tmp = <$point>::default();\n                            let mut first = true;\n\n                            loop {\n                                let work =\n                                    counter.fetch_add(1, Ordering::Relaxed);\n                                if work >= npoints {\n                                    break;\n                                }\n\n                                unsafe {\n                                    $from_affine(&mut tmp, &self[work]);\n                                    let scalar = &scalars[nbytes * work];\n                                    if first {\n                                        $mult(&mut acc, &tmp, scalar, nbits);\n                                        first = false;\n                                    } else {\n                                        $mult(&mut tmp, &tmp, scalar, nbits);\n                                        $add_or_double(&mut acc, &acc, &tmp);\n                                    }\n                                }\n                            }\n\n                            tx.send(acc).expect(\"disaster\");\n                        });\n                    }\n\n                    let mut ret = rx.recv().expect(\"disaster\");\n                    for _ in 1..n_workers {\n                        let p = rx.recv().expect(\"disaster\");\n                        unsafe { $add_or_double(&mut ret, &ret, &p) };\n                    }\n\n                    return ret;\n                }\n\n                let (nx, ny, window) =\n                    breakdown(nbits, pippenger_window_size(npoints), ncpus);\n\n                // |grid[]| holds \"coordinates\" and place for result\n                let mut grid: Vec<(tile, Cell<$point>)> =\n                    Vec::with_capacity(nx * ny);\n                #[allow(clippy::uninit_vec)]\n                unsafe { grid.set_len(grid.capacity()) };\n                let dx = npoints / nx;\n                let mut y = window * (ny - 1);\n                let mut total = 0usize;\n\n                while total < nx {\n                    grid[total].0.x = total * dx;\n                    grid[total].0.dx = dx;\n                    grid[total].0.y = y;\n                    grid[total].0.dy = nbits - y;\n                    total += 1;\n                }\n                grid[total - 1].0.dx = npoints - grid[total - 1].0.x;\n                while y != 0 {\n                    y -= window;\n                    for i in 0..nx {\n                        grid[total].0.x = grid[i].0.x;\n                        grid[total].0.dx = grid[i].0.dx;\n                        grid[total].0.y = y;\n                        grid[total].0.dy = window;\n                        total += 1;\n                    }\n                }\n                let grid = &grid[..];\n\n                let points = &self[..];\n                let sz = unsafe { $scratch_sizeof(0) / 8 };\n\n                let mut row_sync: Vec<AtomicUsize> = Vec::with_capacity(ny);\n                row_sync.resize_with(ny, Default::default);\n                let row_sync = Arc::new(row_sync);\n                let counter = Arc::new(AtomicUsize::new(0));\n                let n_workers = core::cmp::min(ncpus, total);\n                let (tx, rx) = sync_channel(n_workers);\n                for _ in 0..n_workers {\n                    let tx = tx.clone();\n                    let counter = counter.clone();\n                    let row_sync = row_sync.clone();\n\n                    pool.joined_execute(move || {\n                        let mut scratch = vec![0u64; sz << (window - 1)];\n                        let mut p: [*const $point_affine; 2] =\n                            [ptr::null(), ptr::null()];\n                        let mut s: [*const u8; 2] = [ptr::null(), ptr::null()];\n\n                        loop {\n                            let work = counter.fetch_add(1, Ordering::Relaxed);\n                            if work >= total {\n                                break;\n                            }\n                            let x = grid[work].0.x;\n                            let y = grid[work].0.y;\n\n                            p[0] = &points[x];\n                            s[0] = &scalars[x * nbytes];\n                            unsafe {\n                                $tile_mult(\n                                    grid[work].1.as_ptr(),\n                                    &p[0],\n                                    grid[work].0.dx,\n                                    &s[0],\n                                    nbits,\n                                    &mut scratch[0],\n                                    y,\n                                    window,\n                                );\n                            }\n                            if row_sync[y / window]\n                                .fetch_add(1, Ordering::AcqRel)\n                                == nx - 1\n                            {\n                                tx.send(y).expect(\"disaster\");\n                            }\n                        }\n                    });\n                }\n\n                let mut ret = <$point>::default();\n                let mut rows = vec![false; ny];\n                let mut row = 0usize;\n                for _ in 0..ny {\n                    let mut y = rx.recv().unwrap();\n                    rows[y / window] = true;\n                    while grid[row].0.y == y {\n                        while row < total && grid[row].0.y == y {\n                            unsafe {\n                                $add_or_double(\n                                    &mut ret,\n                                    &ret,\n                                    grid[row].1.as_ptr(),\n                                );\n                            }\n                            row += 1;\n                        }\n                        if y == 0 {\n                            break;\n                        }\n                        for _ in 0..window {\n                            unsafe { $double(&mut ret, &ret) };\n                        }\n                        y -= window;\n                        if !rows[y / window] {\n                            break;\n                        }\n                    }\n                }\n                ret\n            }\n\n            fn add(&self) -> $point {\n                let npoints = self.len();\n\n                let pool = mt::da_pool();\n                let ncpus = pool.max_count();\n                if ncpus < 2 || npoints < 384 {\n                    let p: [*const _; 2] = [&self[0], ptr::null()];\n                    let mut ret = <$point>::default();\n                    unsafe { $add(&mut ret, &p[0], npoints) };\n                    return ret;\n                }\n\n                let counter = Arc::new(AtomicUsize::new(0));\n                let nchunks = (npoints + 255) / 256;\n                let chunk = npoints / nchunks + 1;\n                let n_workers = core::cmp::min(ncpus, nchunks);\n                let (tx, rx) = sync_channel(n_workers);\n                for _ in 0..n_workers {\n                    let tx = tx.clone();\n                    let counter = counter.clone();\n\n                    pool.joined_execute(move || {\n                        let mut acc = <$point>::default();\n                        let mut chunk = chunk;\n                        let mut p: [*const _; 2] = [ptr::null(), ptr::null()];\n\n                        loop {\n                            let work =\n                                counter.fetch_add(chunk, Ordering::Relaxed);\n                            if work >= npoints {\n                                break;\n                            }\n                            p[0] = &self[work];\n                            if work + chunk > npoints {\n                                chunk = npoints - work;\n                            }\n                            unsafe {\n                                let mut t = MaybeUninit::<$point>::uninit();\n                                $add(t.as_mut_ptr(), &p[0], chunk);\n                                $add_or_double(&mut acc, &acc, t.as_ptr());\n                            };\n                        }\n                        tx.send(acc).expect(\"disaster\");\n                    });\n                }\n\n                let mut ret = rx.recv().unwrap();\n                for _ in 1..n_workers {\n                    unsafe {\n                        $add_or_double(&mut ret, &ret, &rx.recv().unwrap())\n                    };\n                }\n\n                ret\n            }\n\n            fn validate(&self) -> Result<(), BLST_ERROR> {\n                fn check(point: &$point_affine) -> Result<(), BLST_ERROR> {\n                    if unsafe { $is_inf(point) } {\n                        return Err(BLST_ERROR::BLST_PK_IS_INFINITY);\n                    }\n                    if !unsafe { $in_group(point) } {\n                        return Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP);\n                    }\n                    Ok(())\n                }\n\n                let npoints = self.len();\n\n                let pool = mt::da_pool();\n                let n_workers = core::cmp::min(npoints, pool.max_count());\n                if n_workers < 2 {\n                    for i in 0..npoints {\n                        check(&self[i])?\n                    }\n                    return Ok(())\n                }\n\n                let counter = Arc::new(AtomicUsize::new(0));\n                let valid = Arc::new(AtomicBool::new(true));\n                let wg =\n                    Arc::new((Barrier::new(2), AtomicUsize::new(n_workers)));\n\n                for _ in 0..n_workers {\n                    let counter = counter.clone();\n                    let valid = valid.clone();\n                    let wg = wg.clone();\n\n                    pool.joined_execute(move || {\n                        while valid.load(Ordering::Relaxed) {\n                            let work = counter.fetch_add(1, Ordering::Relaxed);\n                            if work >= npoints {\n                                break;\n                            }\n\n                            if check(&self[work]).is_err() {\n                                valid.store(false, Ordering::Relaxed);\n                                break;\n                            }\n                        }\n\n                        if wg.1.fetch_sub(1, Ordering::AcqRel) == 1 {\n                            wg.0.wait();\n                        }\n                    });\n                }\n\n                wg.0.wait();\n\n                if valid.load(Ordering::Relaxed) {\n                    return Ok(());\n                } else {\n                    return Err(BLST_ERROR::BLST_POINT_NOT_IN_GROUP);\n                }\n            }\n        }\n\n        #[cfg(test)]\n        pippenger_test_mod!(\n            $test_mod,\n            $points,\n            $point,\n            $add_or_double,\n            $generator,\n            $mult,\n        );\n    };\n}\n\n#[cfg(test)]\ninclude!(\"pippenger-test_mod.rs\");\n\npippenger_mult_impl!(\n    p1_affines,\n    blst_p1,\n    blst_p1_affine,\n    blst_p1s_to_affine,\n    blst_p1s_mult_pippenger_scratch_sizeof,\n    blst_p1s_mult_pippenger,\n    blst_p1s_tile_pippenger,\n    blst_p1_add_or_double,\n    blst_p1_double,\n    p1_multi_point,\n    blst_p1_generator,\n    blst_p1_mult,\n    blst_p1s_add,\n    blst_p1_affine_is_inf,\n    blst_p1_affine_in_g1,\n    blst_p1_from_affine,\n);\n\npippenger_mult_impl!(\n    p2_affines,\n    blst_p2,\n    blst_p2_affine,\n    blst_p2s_to_affine,\n    blst_p2s_mult_pippenger_scratch_sizeof,\n    blst_p2s_mult_pippenger,\n    blst_p2s_tile_pippenger,\n    blst_p2_add_or_double,\n    blst_p2_double,\n    p2_multi_point,\n    blst_p2_generator,\n    blst_p2_mult,\n    blst_p2s_add,\n    blst_p2_affine_is_inf,\n    blst_p2_affine_in_g2,\n    blst_p2_from_affine,\n);\n\nfn num_bits(l: usize) -> usize {\n    8 * core::mem::size_of_val(&l) - l.leading_zeros() as usize\n}\n\nfn breakdown(\n    nbits: usize,\n    window: usize,\n    ncpus: usize,\n) -> (usize, usize, usize) {\n    let mut nx: usize;\n    let mut wnd: usize;\n\n    if nbits > window * ncpus {\n        nx = 1;\n        wnd = num_bits(ncpus / 4);\n        if (window + wnd) > 18 {\n            wnd = window - wnd;\n        } else {\n            wnd = (nbits / window + ncpus - 1) / ncpus;\n            if (nbits / (window + 1) + ncpus - 1) / ncpus < wnd {\n                wnd = window + 1;\n            } else {\n                wnd = window;\n            }\n        }\n    } else {\n        nx = 2;\n        wnd = window - 2;\n        while (nbits / wnd + 1) * nx < ncpus {\n            nx += 1;\n            wnd = window - num_bits(3 * nx / 2);\n        }\n        nx -= 1;\n        wnd = window - num_bits(3 * nx / 2);\n    }\n    let ny = nbits / wnd + 1;\n    wnd = nbits / ny + 1;\n\n    (nx, ny, wnd)\n}\n\nfn pippenger_window_size(npoints: usize) -> usize {\n    let wbits = num_bits(npoints);\n\n    if wbits > 13 {\n        return wbits - 4;\n    }\n    if wbits > 5 {\n        return wbits - 3;\n    }\n    2\n}\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/BLS12381G1_XMD_SHA-256_SSWU_NU_.json",
    "content": "{\n  \"L\": \"0x40\",\n  \"Z\": \"0xb\",\n  \"ciphersuite\": \"BLS12381G1_XMD:SHA-256_SSWU_NU_\",\n  \"curve\": \"BLS12-381 G1\",\n  \"dst\": \"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_NU_\",\n  \"expand\": \"XMD\",\n  \"field\": {\n    \"m\": \"0x1\",\n    \"p\": \"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab\"\n  },\n  \"hash\": \"sha256\",\n  \"k\": \"0x80\",\n  \"map\": {\n    \"name\": \"SSWU\"\n  },\n  \"randomOracle\": false,\n  \"vectors\": [\n    {\n      \"P\": {\n        \"x\": \"0x184bb665c37ff561a89ec2122dd343f20e0f4cbcaec84e3c3052ea81d1834e192c426074b02ed3dca4e7676ce4ce48ba\",\n        \"y\": \"0x04407b8d35af4dacc809927071fc0405218f1401a6d15af775810e4e460064bcc9468beeba82fdc751be70476c888bf3\"\n      },\n      \"Q\": {\n        \"x\": \"0x11398d3b324810a1b093f8e35aa8571cced95858207e7f49c4fd74656096d61d8a2f9a23cdb18a4dd11cd1d66f41f709\",\n        \"y\": \"0x19316b6fb2ba7717355d5d66a361899057e1e84a6823039efc7beccefe09d023fb2713b1c415fcf278eb0c39a89b4f72\"\n      },\n      \"msg\": \"\",\n      \"u\": [\n        \"0x156c8a6a2c184569d69a76be144b5cdc5141d2d2ca4fe341f011e25e3969c55ad9e9b9ce2eb833c81a908e5fa4ac5f03\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x009769f3ab59bfd551d53a5f846b9984c59b97d6842b20a2c565baa167945e3d026a3755b6345df8ec7e6acb6868ae6d\",\n        \"y\": \"0x1532c00cf61aa3d0ce3e5aa20c3b531a2abd2c770a790a2613818303c6b830ffc0ecf6c357af3317b9575c567f11cd2c\"\n      },\n      \"Q\": {\n        \"x\": \"0x1998321bc27ff6d71df3051b5aec12ff47363d81a5e9d2dff55f444f6ca7e7d6af45c56fd029c58237c266ef5cda5254\",\n        \"y\": \"0x034d274476c6307ae584f951c82e7ea85b84f72d28f4d6471732356121af8d62a49bc263e8eb913a6cf6f125995514ee\"\n      },\n      \"msg\": \"abc\",\n      \"u\": [\n        \"0x147e1ed29f06e4c5079b9d14fc89d2820d32419b990c1c7bb7dbea2a36a045124b31ffbde7c99329c05c559af1c6cc82\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x1974dbb8e6b5d20b84df7e625e2fbfecb2cdb5f77d5eae5fb2955e5ce7313cae8364bc2fff520a6c25619739c6bdcb6a\",\n        \"y\": \"0x15f9897e11c6441eaa676de141c8d83c37aab8667173cbe1dfd6de74d11861b961dccebcd9d289ac633455dfcc7013a3\"\n      },\n      \"Q\": {\n        \"x\": \"0x17d502fa43bd6a4cad2859049a0c3ecefd60240d129be65da271a4c03a9c38fa78163b9d2a919d2beb57df7d609b4919\",\n        \"y\": \"0x109019902ae93a8732abecf2ff7fecd2e4e305eb91f41c9c3267f16b6c19de138c7272947f25512745da6c466cdfd1ac\"\n      },\n      \"msg\": \"abcdef0123456789\",\n      \"u\": [\n        \"0x04090815ad598a06897dd89bcda860f25837d54e897298ce31e6947378134d3761dc59a572154963e8c954919ecfa82d\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x0a7a047c4a8397b3446450642c2ac64d7239b61872c9ae7a59707a8f4f950f101e766afe58223b3bff3a19a7f754027c\",\n        \"y\": \"0x1383aebba1e4327ccff7cf9912bda0dbc77de048b71ef8c8a81111d71dc33c5e3aa6edee9cf6f5fe525d50cc50b77cc9\"\n      },\n      \"Q\": {\n        \"x\": \"0x112eb92dd2b3aa9cd38b08de4bef603f2f9fb0ca226030626a9a2e47ad1e9847fe0a5ed13766c339e38f514bba143b21\",\n        \"y\": \"0x17542ce2f8d0a54f2c5ba8c4b14e10b22d5bcd7bae2af3c965c8c872b571058c720eac448276c99967ded2bf124490e1\"\n      },\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"u\": [\n        \"0x08dccd088ca55b8bfbc96fb50bb25c592faa867a8bb78d4e94a8cc2c92306190244532e91feba2b7fed977e3c3bb5a1f\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x0e7a16a975904f131682edbb03d9560d3e48214c9986bd50417a77108d13dc957500edf96462a3d01e62dc6cd468ef11\",\n        \"y\": \"0x0ae89e677711d05c30a48d6d75e76ca9fb70fe06c6dd6ff988683d89ccde29ac7d46c53bb97a59b1901abf1db66052db\"\n      },\n      \"Q\": {\n        \"x\": \"0x1775d400a1bacc1c39c355da7e96d2d1c97baa9430c4a3476881f8521c09a01f921f592607961efc99c4cd46bd78ca19\",\n        \"y\": \"0x1109b5d59f65964315de65a7a143e86eabc053104ed289cf480949317a5685fad7254ff8e7fe6d24d3104e5d55ad6370\"\n      },\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"u\": [\n        \"0x0dd824886d2123a96447f6c56e3a3fa992fbfefdba17b6673f9f630ff19e4d326529db37e1c1be43f905bf9202e0278d\"\n      ]\n    }\n  ]\n}\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/BLS12381G1_XMD_SHA-256_SSWU_RO_.json",
    "content": "{\n  \"L\": \"0x40\",\n  \"Z\": \"0xb\",\n  \"ciphersuite\": \"BLS12381G1_XMD:SHA-256_SSWU_RO_\",\n  \"curve\": \"BLS12-381 G1\",\n  \"dst\": \"QUUX-V01-CS02-with-BLS12381G1_XMD:SHA-256_SSWU_RO_\",\n  \"expand\": \"XMD\",\n  \"field\": {\n    \"m\": \"0x1\",\n    \"p\": \"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab\"\n  },\n  \"hash\": \"sha256\",\n  \"k\": \"0x80\",\n  \"map\": {\n    \"name\": \"SSWU\"\n  },\n  \"randomOracle\": true,\n  \"vectors\": [\n    {\n      \"P\": {\n        \"x\": \"0x052926add2207b76ca4fa57a8734416c8dc95e24501772c814278700eed6d1e4e8cf62d9c09db0fac349612b759e79a1\",\n        \"y\": \"0x08ba738453bfed09cb546dbb0783dbb3a5f1f566ed67bb6be0e8c67e2e81a4cc68ee29813bb7994998f3eae0c9c6a265\"\n      },\n      \"Q0\": {\n        \"x\": \"0x11a3cce7e1d90975990066b2f2643b9540fa40d6137780df4e753a8054d07580db3b7f1f03396333d4a359d1fe3766fe\",\n        \"y\": \"0x0eeaf6d794e479e270da10fdaf768db4c96b650a74518fc67b04b03927754bac66f3ac720404f339ecdcc028afa091b7\"\n      },\n      \"Q1\": {\n        \"x\": \"0x160003aaf1632b13396dbad518effa00fff532f604de1a7fc2082ff4cb0afa2d63b2c32da1bef2bf6c5ca62dc6b72f9c\",\n        \"y\": \"0x0d8bb2d14e20cf9f6036152ed386d79189415b6d015a20133acb4e019139b94e9c146aaad5817f866c95d609a361735e\"\n      },\n      \"msg\": \"\",\n      \"u\": [\n        \"0x0ba14bd907ad64a016293ee7c2d276b8eae71f25a4b941eece7b0d89f17f75cb3ae5438a614fb61d6835ad59f29c564f\",\n        \"0x019b9bd7979f12657976de2884c7cce192b82c177c80e0ec604436a7f538d231552f0d96d9f7babe5fa3b19b3ff25ac9\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x03567bc5ef9c690c2ab2ecdf6a96ef1c139cc0b2f284dca0a9a7943388a49a3aee664ba5379a7655d3c68900be2f6903\",\n        \"y\": \"0x0b9c15f3fe6e5cf4211f346271d7b01c8f3b28be689c8429c85b67af215533311f0b8dfaaa154fa6b88176c229f2885d\"\n      },\n      \"Q0\": {\n        \"x\": \"0x125435adce8e1cbd1c803e7123f45392dc6e326d292499c2c45c5865985fd74fe8f042ecdeeec5ecac80680d04317d80\",\n        \"y\": \"0x0e8828948c989126595ee30e4f7c931cbd6f4570735624fd25aef2fa41d3f79cfb4b4ee7b7e55a8ce013af2a5ba20bf2\"\n      },\n      \"Q1\": {\n        \"x\": \"0x11def93719829ecda3b46aa8c31fc3ac9c34b428982b898369608e4f042babee6c77ab9218aad5c87ba785481eff8ae4\",\n        \"y\": \"0x0007c9cef122ccf2efd233d6eb9bfc680aa276652b0661f4f820a653cec1db7ff69899f8e52b8e92b025a12c822a6ce6\"\n      },\n      \"msg\": \"abc\",\n      \"u\": [\n        \"0x0d921c33f2bad966478a03ca35d05719bdf92d347557ea166e5bba579eea9b83e9afa5c088573c2281410369fbd32951\",\n        \"0x003574a00b109ada2f26a37a91f9d1e740dffd8d69ec0c35e1e9f4652c7dba61123e9dd2e76c655d956e2b3462611139\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x11e0b079dea29a68f0383ee94fed1b940995272407e3bb916bbf268c263ddd57a6a27200a784cbc248e84f357ce82d98\",\n        \"y\": \"0x03a87ae2caf14e8ee52e51fa2ed8eefe80f02457004ba4d486d6aa1f517c0889501dc7413753f9599b099ebcbbd2d709\"\n      },\n      \"Q0\": {\n        \"x\": \"0x08834484878c217682f6d09a4b51444802fdba3d7f2df9903a0ddadb92130ebbfa807fffa0eabf257d7b48272410afff\",\n        \"y\": \"0x0b318f7ecf77f45a0f038e62d7098221d2dbbca2a394164e2e3fe953dc714ac2cde412d8f2d7f0c03b259e6795a2508e\"\n      },\n      \"Q1\": {\n        \"x\": \"0x158418ed6b27e2549f05531a8281b5822b31c3bf3144277fbb977f8d6e2694fedceb7011b3c2b192f23e2a44b2bd106e\",\n        \"y\": \"0x1879074f344471fac5f839e2b4920789643c075792bec5af4282c73f7941cda5aa77b00085eb10e206171b9787c4169f\"\n      },\n      \"msg\": \"abcdef0123456789\",\n      \"u\": [\n        \"0x062d1865eb80ebfa73dcfc45db1ad4266b9f3a93219976a3790ab8d52d3e5f1e62f3b01795e36834b17b70e7b76246d4\",\n        \"0x0cdc3e2f271f29c4ff75020857ce6c5d36008c9b48385ea2f2bf6f96f428a3deb798aa033cd482d1cdc8b30178b08e3a\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x15f68eaa693b95ccb85215dc65fa81038d69629f70aeee0d0f677cf22285e7bf58d7cb86eefe8f2e9bc3f8cb84fac488\",\n        \"y\": \"0x1807a1d50c29f430b8cafc4f8638dfeeadf51211e1602a5f184443076715f91bb90a48ba1e370edce6ae1062f5e6dd38\"\n      },\n      \"Q0\": {\n        \"x\": \"0x0cbd7f84ad2c99643fea7a7ac8f52d63d66cefa06d9a56148e58b984b3dd25e1f41ff47154543343949c64f88d48a710\",\n        \"y\": \"0x052c00e4ed52d000d94881a5638ae9274d3efc8bc77bc0e5c650de04a000b2c334a9e80b85282a00f3148dfdface0865\"\n      },\n      \"Q1\": {\n        \"x\": \"0x06493fb68f0d513af08be0372f849436a787e7b701ae31cb964d968021d6ba6bd7d26a38aaa5a68e8c21a6b17dc8b579\",\n        \"y\": \"0x02e98f2ccf5802b05ffaac7c20018bc0c0b2fd580216c4aa2275d2909dc0c92d0d0bdc979226adeb57a29933536b6bb4\"\n      },\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"u\": [\n        \"0x010476f6a060453c0b1ad0b628f3e57c23039ee16eea5e71bb87c3b5419b1255dc0e5883322e563b84a29543823c0e86\",\n        \"0x0b1a912064fb0554b180e07af7e787f1f883a0470759c03c1b6509eb8ce980d1670305ae7b928226bb58fdc0a419f46e\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x082aabae8b7dedb0e78aeb619ad3bfd9277a2f77ba7fad20ef6aabdc6c31d19ba5a6d12283553294c1825c4b3ca2dcfe\",\n        \"y\": \"0x05b84ae5a942248eea39e1d91030458c40153f3b654ab7872d779ad1e942856a20c438e8d99bc8abfbf74729ce1f7ac8\"\n      },\n      \"Q0\": {\n        \"x\": \"0x0cf97e6dbd0947857f3e578231d07b309c622ade08f2c08b32ff372bd90db19467b2563cc997d4407968d4ac80e154f8\",\n        \"y\": \"0x127f0cddf2613058101a5701f4cb9d0861fd6c2a1b8e0afe194fccf586a3201a53874a2761a9ab6d7220c68661a35ab3\"\n      },\n      \"Q1\": {\n        \"x\": \"0x092f1acfa62b05f95884c6791fba989bbe58044ee6355d100973bf9553ade52b47929264e6ae770fb264582d8dce512a\",\n        \"y\": \"0x028e6d0169a72cfedb737be45db6c401d3adfb12c58c619c82b93a5dfcccef12290de530b0480575ddc8397cda0bbebf\"\n      },\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"u\": [\n        \"0x0a8ffa7447f6be1c5a2ea4b959c9454b431e29ccc0802bc052413a9c5b4f9aac67a93431bd480d15be1e057c8a08e8c6\",\n        \"0x05d487032f602c90fa7625dbafe0f4a49ef4a6b0b33d7bb349ff4cf5410d297fd6241876e3e77b651cfc8191e40a68b7\"\n      ]\n    }\n  ]\n}\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/BLS12381G2_XMD_SHA-256_SSWU_NU_.json",
    "content": "{\n  \"L\": \"0x40\",\n  \"Z\": \"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9,0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaaa\",\n  \"ciphersuite\": \"BLS12381G2_XMD:SHA-256_SSWU_NU_\",\n  \"curve\": \"BLS12-381 G2\",\n  \"dst\": \"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_NU_\",\n  \"expand\": \"XMD\",\n  \"field\": {\n    \"m\": \"0x2\",\n    \"p\": \"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab\"\n  },\n  \"hash\": \"sha256\",\n  \"k\": \"0x80\",\n  \"map\": {\n    \"name\": \"SSWU\"\n  },\n  \"randomOracle\": false,\n  \"vectors\": [\n    {\n      \"P\": {\n        \"x\": \"0x00e7f4568a82b4b7dc1f14c6aaa055edf51502319c723c4dc2688c7fe5944c213f510328082396515734b6612c4e7bb7,0x126b855e9e69b1f691f816e48ac6977664d24d99f8724868a184186469ddfd4617367e94527d4b74fc86413483afb35b\",\n        \"y\": \"0x0caead0fd7b6176c01436833c79d305c78be307da5f6af6c133c47311def6ff1e0babf57a0fb5539fce7ee12407b0a42,0x1498aadcf7ae2b345243e281ae076df6de84455d766ab6fcdaad71fab60abb2e8b980a440043cd305db09d283c895e3d\"\n      },\n      \"Q\": {\n        \"x\": \"0x18ed3794ad43c781816c523776188deafba67ab773189b8f18c49bc7aa841cd81525171f7a5203b2a340579192403bef,0x0727d90785d179e7b5732c8a34b660335fed03b913710b60903cf4954b651ed3466dc3728e21855ae822d4a0f1d06587\",\n        \"y\": \"0x00764a5cf6c5f61c52c838523460eb2168b5a5b43705e19cb612e006f29b717897facfd15dd1c8874c915f6d53d0342d,0x19290bb9797c12c1d275817aa2605ebe42275b66860f0e4d04487ebc2e47c50b36edd86c685a60c20a2bd584a82b011a\"\n      },\n      \"msg\": \"\",\n      \"u\": [\n        \"0x07355d25caf6e7f2f0cb2812ca0e513bd026ed09dda65b177500fa31714e09ea0ded3a078b526bed3307f804d4b93b04,0x02829ce3c021339ccb5caf3e187f6370e1e2a311dec9b75363117063ab2015603ff52c3d3b98f19c2f65575e99e8b78c\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x108ed59fd9fae381abfd1d6bce2fd2fa220990f0f837fa30e0f27914ed6e1454db0d1ee957b219f61da6ff8be0d6441f,0x0296238ea82c6d4adb3c838ee3cb2346049c90b96d602d7bb1b469b905c9228be25c627bffee872def773d5b2a2eb57d\",\n        \"y\": \"0x033f90f6057aadacae7963b0a0b379dd46750c1c94a6357c99b65f63b79e321ff50fe3053330911c56b6ceea08fee656,0x153606c417e59fb331b7ae6bce4fbf7c5190c33ce9402b5ebe2b70e44fca614f3f1382a3625ed5493843d0b0a652fc3f\"\n      },\n      \"Q\": {\n        \"x\": \"0x0f40e1d5025ecef0d850aa0bb7bbeceab21a3d4e85e6bee857805b09693051f5b25428c6be343edba5f14317fcc30143,0x02e0d261f2b9fee88b82804ec83db330caa75fbb12719cfa71ccce1c532dc4e1e79b0a6a281ed8d3817524286c8bc04c\",\n        \"y\": \"0x0cf4a4adc5c66da0bca4caddc6a57ecd97c8252d7526a8ff478e0dfed816c4d321b5c3039c6683ae9b1e6a3a38c9c0ae,0x11cad1646bb3768c04be2ab2bbe1f80263b7ff6f8f9488f5bc3b6850e5a3e97e20acc583613c69cf3d2bfe8489744ebb\"\n      },\n      \"msg\": \"abc\",\n      \"u\": [\n        \"0x138879a9559e24cecee8697b8b4ad32cced053138ab913b99872772dc753a2967ed50aabc907937aefb2439ba06cc50c,0x0a1ae7999ea9bab1dcc9ef8887a6cb6e8f1e22566015428d220b7eec90ffa70ad1f624018a9ad11e78d588bd3617f9f2\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x038af300ef34c7759a6caaa4e69363cafeed218a1f207e93b2c70d91a1263d375d6730bd6b6509dcac3ba5b567e85bf3,0x0da75be60fb6aa0e9e3143e40c42796edf15685cafe0279afd2a67c3dff1c82341f17effd402e4f1af240ea90f4b659b\",\n        \"y\": \"0x19b148cbdf163cf0894f29660d2e7bfb2b68e37d54cc83fd4e6e62c020eaa48709302ef8e746736c0e19342cc1ce3df4,0x0492f4fed741b073e5a82580f7c663f9b79e036b70ab3e51162359cec4e77c78086fe879b65ca7a47d34374c8315ac5e\"\n      },\n      \"Q\": {\n        \"x\": \"0x13a9d4a738a85c9f917c7be36b240915434b58679980010499b9ae8d7a1bf7fbe617a15b3cd6060093f40d18e0f19456,0x16fa88754e7670366a859d6f6899ad765bf5a177abedb2740aacc9252c43f90cd0421373fbd5b2b76bb8f5c4886b5d37\",\n        \"y\": \"0x0a7fa7d82c46797039398253e8765a4194100b330dfed6d7fbb46d6fbf01e222088779ac336e3675c7a7a0ee05bbb6e3,0x0c6ee170ab766d11fa9457cef53253f2628010b2cffc102b3b28351eb9df6c281d3cfc78e9934769d661b72a5265338d\"\n      },\n      \"msg\": \"abcdef0123456789\",\n      \"u\": [\n        \"0x18c16fe362b7dbdfa102e42bdfd3e2f4e6191d479437a59db4eb716986bf08ee1f42634db66bde97d6c16bbfd342b3b8,0x0e37812ce1b146d998d5f92bdd5ada2a31bfd63dfe18311aa91637b5f279dd045763166aa1615e46a50d8d8f475f184e\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x0c5ae723be00e6c3f0efe184fdc0702b64588fe77dda152ab13099a3bacd3876767fa7bbad6d6fd90b3642e902b208f9,0x12c8c05c1d5fc7bfa847f4d7d81e294e66b9a78bc9953990c358945e1f042eedafce608b67fdd3ab0cb2e6e263b9b1ad\",\n        \"y\": \"0x04e77ddb3ede41b5ec4396b7421dd916efc68a358a0d7425bddd253547f2fb4830522358491827265dfc5bcc1928a569,0x11c624c56dbe154d759d021eec60fab3d8b852395a89de497e48504366feedd4662d023af447d66926a28076813dd646\"\n      },\n      \"Q\": {\n        \"x\": \"0x0a08b2f639855dfdeaaed972702b109e2241a54de198b2b4cd12ad9f88fa419a6086a58d91fc805de812ea29bee427c2,0x04a7442e4cb8b42ef0f41dac9ee74e65ecad3ce0851f0746dc47568b0e7a8134121ed09ba054509232c49148aef62cda\",\n        \"y\": \"0x05d60b1f04212b2c87607458f71d770f43973511c260f0540eef3a565f42c7ce59aa1cea684bb2a7bcab84acd2f36c8c,0x1017aa5747ba15505ece266a86b0ca9c712f41a254b76ca04094ca442ce45ecd224bd5544cd16685d0d1b9d156dd0531\"\n      },\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"u\": [\n        \"0x08d4a0997b9d52fecf99427abb721f0fa779479963315fe21c6445250de7183e3f63bfdf86570da8929489e421d4ee95,0x16cb4ccad91ec95aab070f22043916cd6a59c4ca94097f7f510043d48515526dc8eaaea27e586f09151ae613688d5a89\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x0ea4e7c33d43e17cc516a72f76437c4bf81d8f4eac69ac355d3bf9b71b8138d55dc10fd458be115afa798b55dac34be1,0x1565c2f625032d232f13121d3cfb476f45275c303a037faa255f9da62000c2c864ea881e2bcddd111edc4a3c0da3e88d\",\n        \"y\": \"0x043b6f5fe4e52c839148dc66f2b3751e69a0f6ebb3d056d6465d50d4108543ecd956e10fa1640dfd9bc0030cc2558d28,0x0f8991d2a1ad662e7b6f58ab787947f1fa607fce12dde171bc17903b012091b657e15333e11701edcf5b63ba2a561247\"\n      },\n      \"Q\": {\n        \"x\": \"0x19592c812d5a50c5601062faba14c7d670711745311c879de1235a0a11c75aab61327bf2d1725db07ec4d6996a682886,0x0eef4fa41ddc17ed47baf447a2c498548f3c72a02381313d13bef916e240b61ce125539090d62d9fbb14a900bf1b8e90\",\n        \"y\": \"0x1260d6e0987eae96af9ebe551e08de22b37791d53f4db9e0d59da736e66699735793e853e26362531fe4adf99c1883e3,0x0dbace5df0a4ac4ac2f45d8fdf8aee45484576fdd6efc4f98ab9b9f4112309e628255e183022d98ea5ed6e47ca00306c\"\n      },\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"u\": [\n        \"0x03f80ce4ff0ca2f576d797a3660e3f65b274285c054feccc3215c879e2c0589d376e83ede13f93c32f05da0f68fd6a10,0x006488a837c5413746d868d1efb7232724da10eca410b07d8b505b9363bdccf0a1fc0029bad07d65b15ccfe6dd25e20d\"\n      ]\n    }\n  ]\n}\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/BLS12381G2_XMD_SHA-256_SSWU_RO_.json",
    "content": "{\n  \"L\": \"0x40\",\n  \"Z\": \"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9,0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaaa\",\n  \"ciphersuite\": \"BLS12381G2_XMD:SHA-256_SSWU_RO_\",\n  \"curve\": \"BLS12-381 G2\",\n  \"dst\": \"QUUX-V01-CS02-with-BLS12381G2_XMD:SHA-256_SSWU_RO_\",\n  \"expand\": \"XMD\",\n  \"field\": {\n    \"m\": \"0x2\",\n    \"p\": \"0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab\"\n  },\n  \"hash\": \"sha256\",\n  \"k\": \"0x80\",\n  \"map\": {\n    \"name\": \"SSWU\"\n  },\n  \"randomOracle\": true,\n  \"vectors\": [\n    {\n      \"P\": {\n        \"x\": \"0x0141ebfbdca40eb85b87142e130ab689c673cf60f1a3e98d69335266f30d9b8d4ac44c1038e9dcdd5393faf5c41fb78a,0x05cb8437535e20ecffaef7752baddf98034139c38452458baeefab379ba13dff5bf5dd71b72418717047f5b0f37da03d\",\n        \"y\": \"0x0503921d7f6a12805e72940b963c0cf3471c7b2a524950ca195d11062ee75ec076daf2d4bc358c4b190c0c98064fdd92,0x12424ac32561493f3fe3c260708a12b7c620e7be00099a974e259ddc7d1f6395c3c811cdd19f1e8dbf3e9ecfdcbab8d6\"\n      },\n      \"Q0\": {\n        \"x\": \"0x019ad3fc9c72425a998d7ab1ea0e646a1f6093444fc6965f1cad5a3195a7b1e099c050d57f45e3fa191cc6d75ed7458c,0x171c88b0b0efb5eb2b88913a9e74fe111a4f68867b59db252ce5868af4d1254bfab77ebde5d61cd1a86fb2fe4a5a1c1d\",\n        \"y\": \"0x0ba10604e62bdd9eeeb4156652066167b72c8d743b050fb4c1016c31b505129374f76e03fa127d6a156213576910fef3,0x0eb22c7a543d3d376e9716a49b72e79a89c9bfe9feee8533ed931cbb5373dde1fbcd7411d8052e02693654f71e15410a\"\n      },\n      \"Q1\": {\n        \"x\": \"0x113d2b9cd4bd98aee53470b27abc658d91b47a78a51584f3d4b950677cfb8a3e99c24222c406128c91296ef6b45608be,0x13855912321c5cb793e9d1e88f6f8d342d49c0b0dbac613ee9e17e3c0b3c97dfbb5a49cc3fb45102fdbaf65e0efe2632\",\n        \"y\": \"0x0fd3def0b7574a1d801be44fde617162aa2e89da47f464317d9bb5abc3a7071763ce74180883ad7ad9a723a9afafcdca,0x056f617902b3c0d0f78a9a8cbda43a26b65f602f8786540b9469b060db7b38417915b413ca65f875c130bebfaa59790c\"\n      },\n      \"msg\": \"\",\n      \"u\": [\n        \"0x03dbc2cce174e91ba93cbb08f26b917f98194a2ea08d1cce75b2b9cc9f21689d80bd79b594a613d0a68eb807dfdc1cf8,0x05a2acec64114845711a54199ea339abd125ba38253b70a92c876df10598bd1986b739cad67961eb94f7076511b3b39a\",\n        \"0x02f99798e8a5acdeed60d7e18e9120521ba1f47ec090984662846bc825de191b5b7641148c0dbc237726a334473eee94,0x145a81e418d4010cc027a68f14391b30074e89e60ee7a22f87217b2f6eb0c4b94c9115b436e6fa4607e95a98de30a435\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x02c2d18e033b960562aae3cab37a27ce00d80ccd5ba4b7fe0e7a210245129dbec7780ccc7954725f4168aff2787776e6,0x139cddbccdc5e91b9623efd38c49f81a6f83f175e80b06fc374de9eb4b41dfe4ca3a230ed250fbe3a2acf73a41177fd8\",\n        \"y\": \"0x1787327b68159716a37440985269cf584bcb1e621d3a7202be6ea05c4cfe244aeb197642555a0645fb87bf7466b2ba48,0x00aa65dae3c8d732d10ecd2c50f8a1baf3001578f71c694e03866e9f3d49ac1e1ce70dd94a733534f106d4cec0eddd16\"\n      },\n      \"Q0\": {\n        \"x\": \"0x12b2e525281b5f4d2276954e84ac4f42cf4e13b6ac4228624e17760faf94ce5706d53f0ca1952f1c5ef75239aeed55ad,0x05d8a724db78e570e34100c0bc4a5fa84ad5839359b40398151f37cff5a51de945c563463c9efbdda569850ee5a53e77\",\n        \"y\": \"0x02eacdc556d0bdb5d18d22f23dcb086dd106cad713777c7e6407943edbe0b3d1efe391eedf11e977fac55f9b94f2489c,0x04bbe48bfd5814648d0b9e30f0717b34015d45a861425fabc1ee06fdfce36384ae2c808185e693ae97dcde118f34de41\"\n      },\n      \"Q1\": {\n        \"x\": \"0x19f18cc5ec0c2f055e47c802acc3b0e40c337256a208001dde14b25afced146f37ea3d3ce16834c78175b3ed61f3c537,0x15b0dadc256a258b4c68ea43605dffa6d312eef215c19e6474b3e101d33b661dfee43b51abbf96fee68fc6043ac56a58\",\n        \"y\": \"0x05e47c1781286e61c7ade887512bd9c2cb9f640d3be9cf87ea0bad24bd0ebfe946497b48a581ab6c7d4ca74b5147287f,0x19f98db2f4a1fcdf56a9ced7b320ea9deecf57c8e59236b0dc21f6ee7229aa9705ce9ac7fe7a31c72edca0d92370c096\"\n      },\n      \"msg\": \"abc\",\n      \"u\": [\n        \"0x15f7c0aa8f6b296ab5ff9c2c7581ade64f4ee6f1bf18f55179ff44a2cf355fa53dd2a2158c5ecb17d7c52f63e7195771,0x01c8067bf4c0ba709aa8b9abc3d1cef589a4758e09ef53732d670fd8739a7274e111ba2fcaa71b3d33df2a3a0c8529dd\",\n        \"0x187111d5e088b6b9acfdfad078c4dacf72dcd17ca17c82be35e79f8c372a693f60a033b461d81b025864a0ad051a06e4,0x08b852331c96ed983e497ebc6dee9b75e373d923b729194af8e72a051ea586f3538a6ebb1e80881a082fa2b24df9f566\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x121982811d2491fde9ba7ed31ef9ca474f0e1501297f68c298e9f4c0028add35aea8bb83d53c08cfc007c1e005723cd0,0x190d119345b94fbd15497bcba94ecf7db2cbfd1e1fe7da034d26cbba169fb3968288b3fafb265f9ebd380512a71c3f2c\",\n        \"y\": \"0x05571a0f8d3c08d094576981f4a3b8eda0a8e771fcdcc8ecceaf1356a6acf17574518acb506e435b639353c2e14827c8,0x0bb5e7572275c567462d91807de765611490205a941a5a6af3b1691bfe596c31225d3aabdf15faff860cb4ef17c7c3be\"\n      },\n      \"Q0\": {\n        \"x\": \"0x0f48f1ea1318ddb713697708f7327781fb39718971d72a9245b9731faaca4dbaa7cca433d6c434a820c28b18e20ea208,0x06051467c8f85da5ba2540974758f7a1e0239a5981de441fdd87680a995649c211054869c50edbac1f3a86c561ba3162\",\n        \"y\": \"0x168b3d6df80069dbbedb714d41b32961ad064c227355e1ce5fac8e105de5e49d77f0c64867f3834848f152497eb76333,0x134e0e8331cee8cb12f9c2d0742714ed9eee78a84d634c9a95f6a7391b37125ed48bfc6e90bf3546e99930ff67cc97bc\"\n      },\n      \"Q1\": {\n        \"x\": \"0x004fd03968cd1c99a0dd84551f44c206c84dcbdb78076c5bfee24e89a92c8508b52b88b68a92258403cbe1ea2da3495f,0x1674338ea298281b636b2eb0fe593008d03171195fd6dcd4531e8a1ed1f02a72da238a17a635de307d7d24aa2d969a47\",\n        \"y\": \"0x0dc7fa13fff6b12558419e0a1e94bfc3cfaf67238009991c5f24ee94b632c3d09e27eca329989aee348a67b50d5e236c,0x169585e164c131103d85324f2d7747b23b91d66ae5d947c449c8194a347969fc6bbd967729768da485ba71868df8aed2\"\n      },\n      \"msg\": \"abcdef0123456789\",\n      \"u\": [\n        \"0x0313d9325081b415bfd4e5364efaef392ecf69b087496973b229303e1816d2080971470f7da112c4eb43053130b785e1,0x062f84cb21ed89406890c051a0e8b9cf6c575cf6e8e18ecf63ba86826b0ae02548d83b483b79e48512b82a6c0686df8f\",\n        \"0x1739123845406baa7be5c5dc74492051b6d42504de008c635f3535bb831d478a341420e67dcc7b46b2e8cba5379cca97,0x01897665d9cb5db16a27657760bbea7951f67ad68f8d55f7113f24ba6ddd82caef240a9bfa627972279974894701d975\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x19a84dd7248a1066f737cc34502ee5555bd3c19f2ecdb3c7d9e24dc65d4e25e50d83f0f77105e955d78f4762d33c17da,0x0934aba516a52d8ae479939a91998299c76d39cc0c035cd18813bec433f587e2d7a4fef038260eef0cef4d02aae3eb91\",\n        \"y\": \"0x14f81cd421617428bc3b9fe25afbb751d934a00493524bc4e065635b0555084dd54679df1536101b2c979c0152d09192,0x09bcccfa036b4847c9950780733633f13619994394c23ff0b32fa6b795844f4a0673e20282d07bc69641cee04f5e5662\"\n      },\n      \"Q0\": {\n        \"x\": \"0x09eccbc53df677f0e5814e3f86e41e146422834854a224bf5a83a50e4cc0a77bfc56718e8166ad180f53526ea9194b57,0x0c3633943f91daee715277bd644fba585168a72f96ded64fc5a384cce4ec884a4c3c30f08e09cd2129335dc8f67840ec\",\n        \"y\": \"0x0eb6186a0457d5b12d132902d4468bfeb7315d83320b6c32f1c875f344efcba979952b4aa418589cb01af712f98cc555,0x119e3cf167e69eb16c1c7830e8df88856d48be12e3ff0a40791a5cd2f7221311d4bf13b1847f371f467357b3f3c0b4c7\"\n      },\n      \"Q1\": {\n        \"x\": \"0x0eb3aabc1ddfce17ff18455fcc7167d15ce6b60ddc9eb9b59f8d40ab49420d35558686293d046fc1e42f864b7f60e381,0x198bdfb19d7441ebcca61e8ff774b29d17da16547d2c10c273227a635cacea3f16826322ae85717630f0867539b5ed8b\",\n        \"y\": \"0x0aaf1dee3adf3ed4c80e481c09b57ea4c705e1b8d25b897f0ceeec3990748716575f92abff22a1c8f4582aff7b872d52,0x0d058d9061ed27d4259848a06c96c5ca68921a5d269b078650c882cb3c2bd424a8702b7a6ee4e0ead9982baf6843e924\"\n      },\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"u\": [\n        \"0x025820cefc7d06fd38de7d8e370e0da8a52498be9b53cba9927b2ef5c6de1e12e12f188bbc7bc923864883c57e49e253,0x034147b77ce337a52e5948f66db0bab47a8d038e712123bb381899b6ab5ad20f02805601e6104c29df18c254b8618c7b\",\n        \"0x0930315cae1f9a6017c3f0c8f2314baa130e1cf13f6532bff0a8a1790cd70af918088c3db94bda214e896e1543629795,0x10c4df2cacf67ea3cb3108b00d4cbd0b3968031ebc8eac4b1ebcefe84d6b715fde66bef0219951ece29d1facc8a520ef\"\n      ]\n    },\n    {\n      \"P\": {\n        \"x\": \"0x01a6ba2f9a11fa5598b2d8ace0fbe0a0eacb65deceb476fbbcb64fd24557c2f4b18ecfc5663e54ae16a84f5ab7f62534,0x11fca2ff525572795a801eed17eb12785887c7b63fb77a42be46ce4a34131d71f7a73e95fee3f812aea3de78b4d01569\",\n        \"y\": \"0x0b6798718c8aed24bc19cb27f866f1c9effcdbf92397ad6448b5c9db90d2b9da6cbabf48adc1adf59a1a28344e79d57e,0x03a47f8e6d1763ba0cad63d6114c0accbef65707825a511b251a660a9b3994249ae4e63fac38b23da0c398689ee2ab52\"\n      },\n      \"Q0\": {\n        \"x\": \"0x17cadf8d04a1a170f8347d42856526a24cc466cb2ddfd506cff01191666b7f944e31244d662c904de5440516a2b09004,0x0d13ba91f2a8b0051cf3279ea0ee63a9f19bc9cb8bfcc7d78b3cbd8cc4fc43ba726774b28038213acf2b0095391c523e\",\n        \"y\": \"0x17ef19497d6d9246fa94d35575c0f8d06ee02f21a284dbeaa78768cb1e25abd564e3381de87bda26acd04f41181610c5,0x12c3c913ba4ed03c24f0721a81a6be7430f2971ffca8fd1729aafe496bb725807531b44b34b59b3ae5495e5a2dcbd5c8\"\n      },\n      \"Q1\": {\n        \"x\": \"0x16ec57b7fe04c71dfe34fb5ad84dbce5a2dbbd6ee085f1d8cd17f45e8868976fc3c51ad9eeda682c7869024d24579bfd,0x13103f7aace1ae1420d208a537f7d3a9679c287208026e4e3439ab8cd534c12856284d95e27f5e1f33eec2ce656533b0\",\n        \"y\": \"0x0958b2c4c2c10fcef5a6c59b9e92c4a67b0fae3e2e0f1b6b5edad9c940b8f3524ba9ebbc3f2ceb3cfe377655b3163bd7,0x0ccb594ed8bd14ca64ed9cb4e0aba221be540f25dd0d6ba15a4a4be5d67bcf35df7853b2d8dad3ba245f1ea3697f66aa\"\n      },\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"u\": [\n        \"0x190b513da3e66fc9a3587b78c76d1d132b1152174d0b83e3c1114066392579a45824c5fa17649ab89299ddd4bda54935,0x12ab625b0fe0ebd1367fe9fac57bb1168891846039b4216b9d94007b674de2d79126870e88aeef54b2ec717a887dcf39\",\n        \"0x0e6a42010cf435fb5bacc156a585e1ea3294cc81d0ceb81924d95040298380b164f702275892cedd81b62de3aba3f6b5,0x117d9a0defc57a33ed208428cb84e54c85a6840e7648480ae428838989d25d97a0af8e3255be62b25c2a85630d2dddd8\"\n      ]\n    }\n  ]\n}\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/README",
    "content": "These files are downloaded from https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/tree/master/poc/vectors, commit 6d40f98.\n\nNote the file names cannot have \":\" in them as this is incompatible with Windows.\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/expand_message_xmd_SHA256_256.json",
    "content": "{\n  \"DST\": \"QUUX-V01-CS02-with-expander-SHA256-128-long-DST-1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111\",\n  \"hash\": \"SHA256\",\n  \"k\": 128,\n  \"name\": \"expand_message_xmd\",\n  \"tests\": [\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"e8dc0c8b686b7ef2074086fbdd2f30e3f8bfbd3bdf177f73f04b97ce618a3ed3\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"abc\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000616263002000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"52dbf4f36cf560fca57dedec2ad924ee9c266341d8f3d6afe5171733b16bbb12\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"abcdef0123456789\",\n      \"msg_prime\": \"0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000061626364656630313233343536373839002000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"35387dcf22618f3728e6c686490f8b431f76550b0b2c61cbc1ce7001536f4521\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000713132385f7171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171002000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"01b637612bb18e840028be900a833a74414140dde0c4754c198532c3a0ba42bc\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000613531325f6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161002000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"20cce7033cabc5460743180be6fa8aac5a103f56d481cf369a8accc0c374431b\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"14604d85432c68b757e485c8894db3117992fc57e0e136f71ad987f789a0abc287c47876978e2388a02af86b1e8d1342e5ce4f7aaa07a87321e691f6fba7e0072eecc1218aebb89fb14a0662322d5edbd873f0eb35260145cd4e64f748c5dfe60567e126604bcab1a3ee2dc0778102ae8a5cfd1429ebc0fa6bf1a53c36f55dfc\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"abc\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000616263008000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"1a30a5e36fbdb87077552b9d18b9f0aee16e80181d5b951d0471d55b66684914aef87dbb3626eaabf5ded8cd0686567e503853e5c84c259ba0efc37f71c839da2129fe81afdaec7fbdc0ccd4c794727a17c0d20ff0ea55e1389d6982d1241cb8d165762dbc39fb0cee4474d2cbbd468a835ae5b2f20e4f959f56ab24cd6fe267\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"abcdef0123456789\",\n      \"msg_prime\": \"0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000061626364656630313233343536373839008000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"d2ecef3635d2397f34a9f86438d772db19ffe9924e28a1caf6f1c8f15603d4028f40891044e5c7e39ebb9b31339979ff33a4249206f67d4a1e7c765410bcd249ad78d407e303675918f20f26ce6d7027ed3774512ef5b00d816e51bfcc96c3539601fa48ef1c07e494bdc37054ba96ecb9dbd666417e3de289d4f424f502a982\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000713132385f7171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171008000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"ed6e8c036df90111410431431a232d41a32c86e296c05d426e5f44e75b9a50d335b2412bc6c91e0a6dc131de09c43110d9180d0a70f0d6289cb4e43b05f7ee5e9b3f42a1fad0f31bac6a625b3b5c50e3a83316783b649e5ecc9d3b1d9471cb5024b7ccf40d41d1751a04ca0356548bc6e703fca02ab521b505e8e45600508d32\"\n    },\n    {\n      \"DST_prime\": \"412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000613531325f6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161008000412717974da474d0f8c420f320ff81e8432adb7c927d9bd082b4fb4d16c0a23620\",\n      \"uniform_bytes\": \"78b53f2413f3c688f07732c10e5ced29a17c6a16f717179ffbe38d92d6c9ec296502eb9889af83a1928cd162e845b0d3c5424e83280fed3d10cffb2f8431f14e7a23f4c68819d40617589e4c41169d0b56e0e3535be1fd71fbb08bb70c5b5ffed953d6c14bf7618b35fc1f4c4b30538236b4b08c9fbf90462447a8ada60be495\"\n    }\n  ]\n}\n"
  },
  {
    "path": "bindings/vectors/hash_to_curve/expand_message_xmd_SHA256_38.json",
    "content": "{\n  \"DST\": \"QUUX-V01-CS02-with-expander-SHA256-128\",\n  \"hash\": \"SHA256\",\n  \"k\": 128,\n  \"name\": \"expand_message_xmd\",\n  \"tests\": [\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"68a985b87eb6b46952128911f2a4412bbc302a9d759667f87f7a21d803f07235\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"abc\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000616263002000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"d8ccab23b5985ccea865c6c97b6e5b8350e794e603b4b97902f53a8a0d605615\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"abcdef0123456789\",\n      \"msg_prime\": \"0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000061626364656630313233343536373839002000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"eff31487c770a893cfb36f912fbfcbff40d5661771ca4b2cb4eafe524333f5c1\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000713132385f7171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171002000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"b23a1d2b4d97b2ef7785562a7e8bac7eed54ed6e97e29aa51bfe3f12ddad1ff9\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x20\",\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000613531325f6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161002000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"4623227bcc01293b8c130bf771da8c298dede7383243dc0993d2d94823958c4c\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"af84c27ccfd45d41914fdff5df25293e221afc53d8ad2ac06d5e3e29485dadbee0d121587713a3e0dd4d5e69e93eb7cd4f5df4cd103e188cf60cb02edc3edf18eda8576c412b18ffb658e3dd6ec849469b979d444cf7b26911a08e63cf31f9dcc541708d3491184472c2c29bb749d4286b004ceb5ee6b9a7fa5b646c993f0ced\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"abc\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000616263008000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"abba86a6129e366fc877aab32fc4ffc70120d8996c88aee2fe4b32d6c7b6437a647e6c3163d40b76a73cf6a5674ef1d890f95b664ee0afa5359a5c4e07985635bbecbac65d747d3d2da7ec2b8221b17b0ca9dc8a1ac1c07ea6a1e60583e2cb00058e77b7b72a298425cd1b941ad4ec65e8afc50303a22c0f99b0509b4c895f40\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"abcdef0123456789\",\n      \"msg_prime\": \"0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000061626364656630313233343536373839008000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"ef904a29bffc4cf9ee82832451c946ac3c8f8058ae97d8d629831a74c6572bd9ebd0df635cd1f208e2038e760c4994984ce73f0d55ea9f22af83ba4734569d4bc95e18350f740c07eef653cbb9f87910d833751825f0ebefa1abe5420bb52be14cf489b37fe1a72f7de2d10be453b2c9d9eb20c7e3f6edc5a60629178d9478df\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"q128_qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000713132385f7171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171717171008000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"80be107d0884f0d881bb460322f0443d38bd222db8bd0b0a5312a6fedb49c1bbd88fd75d8b9a09486c60123dfa1d73c1cc3169761b17476d3c6b7cbbd727acd0e2c942f4dd96ae3da5de368d26b32286e32de7e5a8cb2949f866a0b80c58116b29fa7fabb3ea7d520ee603e0c25bcaf0b9a5e92ec6a1fe4e0391d1cdbce8c68a\"\n    },\n    {\n      \"DST_prime\": \"515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"len_in_bytes\": \"0x80\",\n      \"msg\": \"a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",\n      \"msg_prime\": \"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000613531325f6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161008000515555582d5630312d435330322d776974682d657870616e6465722d5348413235362d31323826\",\n      \"uniform_bytes\": \"546aff5444b5b79aa6148bd81728704c32decb73a3ba76e9e75885cad9def1d06d6792f8a7d12794e90efed817d96920d728896a4510864370c207f99bd4a608ea121700ef01ed879745ee3e4ceef777eda6d9e5e38b90c86ea6fb0b36504ba4a45d22e86f6db5dd43d98a294bebb9125d5b794e9d2a81181066eb954966a487\"\n    }\n  ]\n}\n"
  },
  {
    "path": "bindings/zig/README.md",
    "content": "# blst for [Zig](https://ziglang.org/)\n\nThe object-oriented interface is modeled after [C++ interface](../blst.hpp), but at the time of writing is a subset of it, sufficient to produce and verify individual and aggregated signatures. See [tests.zig](tests.zig) for an example. C symbols are available with `blst.c.` prefix instead of `blst_`, e.g. `blst_miller_loop` is accessible as `blst.c.miller_loop`.\n\n## Adding dependency to your project\n\nExecute\n```\nzig fetch --save git+https://github.com/supranational/blst\n```\nand add an equivalent of the following line to your build.zig prior to `b.installArtifact(exe)`:\n```\nexe.root_module.addImport(\"blst\", b.dependency(\"blst\", .{}).module(\"blst\"));\n```\nYou should now be able to `@import(\"blst\")` in your application code. The abovementioned fetch command can be used to update the git reference.\n"
  },
  {
    "path": "bindings/zig/blst.zig",
    "content": "//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n// DO NOT EDIT THIS FILE!!!\n// The file is auto-generated by generate.py\n//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n// Copyright Supranational LLC\n// SPDX-License-Identifier: Apache-2.0\n\nconst std = @import(\"std\");\n\npub const c = @import(\"c.zig\");\n\npub const Error = error{\n    BAD_ENCODING,\n    POINT_NOT_ON_CURVE,\n    POINT_NOT_IN_GROUP,\n    AGGR_TYPE_MISMATCH,\n    VERIFY_FAIL,\n    PK_IS_INFINITY,\n    BAD_SCALAR,\n    Unknown,\n};\n\npub const ERROR = enum(c.ERROR) {\n    SUCCESS            = c.SUCCESS,\n    BAD_ENCODING       = c.BAD_ENCODING,\n    POINT_NOT_ON_CURVE = c.POINT_NOT_ON_CURVE,\n    POINT_NOT_IN_GROUP = c.POINT_NOT_IN_GROUP,\n    AGGR_TYPE_MISMATCH = c.AGGR_TYPE_MISMATCH,\n    VERIFY_FAIL        = c.VERIFY_FAIL,\n    PK_IS_INFINITY     = c.PK_IS_INFINITY,\n    BAD_SCALAR         = c.BAD_SCALAR,\n\n    pub fn as_error(self: ERROR) Error {\n        return switch (self) {\n            .BAD_ENCODING       => Error.BAD_ENCODING,\n            .POINT_NOT_ON_CURVE => Error.POINT_NOT_ON_CURVE,\n            .POINT_NOT_IN_GROUP => Error.POINT_NOT_IN_GROUP,\n            .AGGR_TYPE_MISMATCH => Error.AGGR_TYPE_MISMATCH,\n            .VERIFY_FAIL        => Error.VERIFY_FAIL,\n            .PK_IS_INFINITY     => Error.PK_IS_INFINITY,\n            .BAD_SCALAR         => Error.BAD_SCALAR,\n            else                => Error.Unknown,\n        };\n    }\n};\n\npub const SecretKey = struct {\n    key: c.scalar = c.scalar{},\n\n    pub fn keygen(self: *SecretKey, IKM: []const u8, info: ?[]const u8) void {\n        const opt = info orelse &[_]u8{};\n        c.keygen(&self.key, @ptrCast(IKM), IKM.len,\n                            @ptrCast(opt), opt.len);\n    }\n\n    pub fn deinit(self: *SecretKey) void {\n        self.key = c.scalar{};\n    }\n};\n\npub const PT = c.fp12;\n\npub const Pairing = struct {\n    ctx: []u64 = &[_]u64{},\n    allocator: std.mem.Allocator,\n\n    pub fn init(hash_or_encode: bool, DST: []const u8,\n                allocator: std.mem.Allocator) !Pairing {\n        const nlimbs = (c.pairing_sizeof() + @sizeOf(u64) - 1) / @sizeOf(u64);\n        const buffer = try allocator.alloc(u64, nlimbs);\n\n        c.pairing_init(@ptrCast(buffer), hash_or_encode, &DST[0], DST.len);\n\n        return Pairing{\n            .ctx = buffer,\n            .allocator = allocator,\n        };\n    }\n\n    pub fn deinit(self: *Pairing) void {\n        self.allocator.free(self.ctx);\n        self.ctx = &[_]u64{};\n    }\n\n    pub fn aggregate(self: *Pairing, pk: anytype, sig: anytype,\n                     msg: []const u8, aug: ?[]const u8) ERROR {\n        const opt = aug orelse &[_]u8{};\n        var err: c.ERROR = undefined;\n\n        switch (@TypeOf(pk)) {\n            *const P1_Affine, *P1_Affine => {\n                const sigp: [*c]const c.p2_affine = switch (@TypeOf(sig)) {\n                    @TypeOf(null) => null,\n                    else => &sig.point,\n                };\n                err = c.pairing_aggregate_pk_in_g1(@ptrCast(self.ctx),\n                                                   &pk.point, sigp,\n                                                   @ptrCast(msg), msg.len,\n                                                   @ptrCast(opt), opt.len);\n            },\n            *const P2_Affine, *P2_Affine => {\n                const sigp: [*c]const c.p1_affine = switch (@TypeOf(sig)) {\n                    @TypeOf(null) => null,\n                    else => &sig.point,\n                };\n                err = c.pairing_aggregate_pk_in_g2(@ptrCast(self.ctx),\n                                                   &pk.point, sigp,\n                                                   @ptrCast(msg), msg.len,\n                                                   @ptrCast(opt), opt.len);\n            },\n            else => |T| @compileError(\"expected type '*const blst.P1_Affine' \"\n                                      ++ \"or '*const blst.P2_Affine', found '\"\n                                      ++ @typeName(T) ++ \"'\"),\n        }\n\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn commit(self: *Pairing) void {\n        c.pairing_commit(@ptrCast(self.ctx));\n    }\n\n    pub fn merge(self: *Pairing, second: *const Pairing) ERROR {\n        return c.pairing_merge(@ptrCast(self.ctx), @ptrCast(second.ctx));\n    }\n\n    pub fn finalverify(self: *Pairing, optional: ?*const PT) bool {\n        return c.pairing_finalverify(@ptrCast(self.ctx), optional);\n    }\n\n    pub fn raw_aggregate(self: *Pairing, q: *const P2_Affine,\n                                         p: *const P1_Affine) void {\n        c.pairing_raw_aggregate(@ptrCast(self.ctx), &q.point, &p.point);\n    }\n\n    pub fn as_fp12(self: *Pairing) *const PT {\n        return c.pairing_as_fp12(@ptrCast(self.ctx));\n    }\n};\n\npub const Uniq = struct {\n    tree: []u64 = &[_]u64{},\n    allocator: std.mem.Allocator,\n\n    pub fn init(n: usize, allocator: std.mem.Allocator) !Uniq {\n        const nlimbs = (c.uniq_sizeof(n) + @sizeOf(u64) - 1) / @sizeOf(u64);\n        const buffer = try allocator.alloc(u64, nlimbs);\n\n        c.uniq_init(@ptrCast(buffer));\n\n        return Uniq{\n            .tree = buffer,\n            .allocator = allocator,\n        };\n    }\n\n    pub fn deinit(self: *Uniq) void {\n        self.allocator.free(self.tree);\n        self.tree = &[_]u64{};\n    }\n\n    pub fn is_uniq(self: *Uniq, msg: []const u8) bool {\n        return c.uniq_test(@ptrCast(self.tree), @ptrCast(msg), msg.len);\n    }\n};\n\nconst FP_BYTES = 384/8;\npub const P1_COMPRESS_BYTES  = FP_BYTES;\npub const P1_SERIALIZE_BYTES = FP_BYTES*2;\npub const P2_COMPRESS_BYTES  = FP_BYTES*2;\npub const P2_SERIALIZE_BYTES = FP_BYTES*4;\n\n\npub const P1_Affine = struct {\n    point: c.p1_affine = c.p1_affine{},\n\n    pub fn from(in: anytype) !P1_Affine {\n        switch (@TypeOf(in)) {\n            *const P1,\n            *P1  => return in.to_affine(),\n            P1   => @compileError(\"expected type '*const blst.P1', found 'blst.P1'\"),\n            else => |T| {\n                switch (@typeInfo(T)) {\n                    .pointer => { const s: []const u8 = in; _ = s; },\n                    else     => @compileError(\"expected type '[]const u8', found '\" ++ @typeName(T) ++ \"'\"),\n                }\n\n                var ret: P1_Affine = undefined;\n                const err = ret.deserialize(in);\n                return if (err == .SUCCESS) ret else err.as_error();\n            },\n        }\n        unreachable;\n    }\n\n    pub fn deserialize(self: *P1_Affine, in: []const u8) ERROR {\n        if (in.len == 0) {\n            return .BAD_ENCODING;\n        }\n        const expected = @as(usize, if (in[0]&0x80 != 0) P1_COMPRESS_BYTES\n                                    else                 P1_SERIALIZE_BYTES);\n        if (in.len != expected) {\n            return .BAD_ENCODING;\n        }\n        const err = c.p1_deserialize(&self.point, &in[0]);\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn serialize(self: *const P1_Affine) [P1_SERIALIZE_BYTES]u8 {\n        var ret: [P1_SERIALIZE_BYTES]u8 = undefined;\n        c.p1_affine_serialize(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn compress(self: *const P1_Affine) [P1_COMPRESS_BYTES]u8 {\n        var ret: [P1_COMPRESS_BYTES]u8 = undefined;\n        c.p1_affine_compress(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn dup(self: *const P1_Affine) P1_Affine {\n        return self.*;\n    }\n\n    pub fn on_curve(self: *const P1_Affine) bool {\n        return c.p1_affine_on_curve(&self.point);\n    }\n\n    pub fn in_group(self: *const P1_Affine) bool {\n        return c.p1_affine_in_g1(&self.point);\n    }\n\n    pub fn is_inf(self: *const P1_Affine) bool {\n        return c.p1_affine_is_inf(&self.point);\n    }\n\n    pub fn is_equal(self: *const P1_Affine, p: *const P1_Affine) bool {\n        return c.p1_affine_is_equal(&self.point, &p.point);\n    }\n\n    pub fn core_verify(self: *const P1_Affine, pk: *const P2_Affine,\n                       hash_or_encode: bool, msg: []const u8, DST: []const u8,\n                       aug: ?[]const u8) ERROR {\n        const opt = aug orelse &[_]u8{};\n        const err = c.core_verify_pk_in_g2(&pk.point, &self.point,\n                                           hash_or_encode,\n                                           @ptrCast(msg), msg.len,\n                                           @ptrCast(DST), DST.len,\n                                           @ptrCast(opt), opt.len);\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn generator() P1_Affine {\n        return P1_Affine{\n            .point = c.p1_affine_generator().*,\n        };\n    }\n\n    pub fn to_jacobian(self: *const P1_Affine) P1 {\n        var ret: P1 = undefined;\n        c.p1_from_affine(&ret.point, &self.point);\n        return ret;\n    }\n};\n\npub const P1 = struct {\n    point: c.p1 = c.p1{},\n\n    pub fn from(in: anytype) !P1 {\n        switch (@TypeOf(in)) {\n            *const SecretKey,\n            *SecretKey  => return P1.public_key(in),\n            SecretKey   => @compileError(\"expected type '*const blst.SecretKey', found 'blst.SecretKey'\"),\n            *const P1_Affine,\n            *P1_Affine  => return in.to_jacobian(),\n            P1_Affine   => @compileError(\"expected type '*const blst.P1_Affine', found 'blst.P1_Affine'\"),\n            else        => |T| {\n                switch (@typeInfo(T)) {\n                    .pointer => { const s: []const u8 = in; _ = s; },\n                    else     => @compileError(\"expected type '[]const u8', found '\" ++ @typeName(T) ++ \"'\"),\n                }\n\n                var ret: P1 = undefined;\n                const err = ret.deserialize(in);\n                return if (err == .SUCCESS) ret else err.as_error();\n            },\n        }\n        unreachable;\n    }\n\n    pub fn deserialize(self: *P1, in: []const u8) ERROR {\n        if (in.len == 0) {\n            return .BAD_ENCODING;\n        }\n        const expected = @as(usize, if (in[0]&0x80 != 0) P1_COMPRESS_BYTES\n                                    else                 P1_SERIALIZE_BYTES);\n        if (in.len != expected) {\n            return .BAD_ENCODING;\n        }\n        const err = c.p1_deserialize(@ptrCast(&self.point), &in[0]);\n        if (err == c.SUCCESS) {\n            c.p1_from_affine(&self.point, @ptrCast(&self.point));\n        }\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn serialize(self: *const P1) [P1_SERIALIZE_BYTES]u8 {\n        var ret: [P1_SERIALIZE_BYTES]u8 = undefined;\n        c.p1_serialize(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn compress(self: *const P1) [P1_COMPRESS_BYTES]u8 {\n        var ret: [P1_COMPRESS_BYTES]u8 = undefined;\n        c.p1_compress(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn public_key(sk: *const SecretKey) P1 {\n        var ret: P1 = undefined;\n        c.sk_to_pk_in_g1(&ret.point, &sk.key);\n        return ret;\n    }\n\n    pub fn dup(self: *const P1) P1 {\n        return self.*;\n    }\n\n    pub fn on_curve(self: *const P1) bool {\n        return c.p1_on_curve(&self.point);\n    }\n\n    pub fn in_group(self: *const P1) bool {\n        return c.p1_in_g1(&self.point);\n    }\n\n    pub fn is_inf(self: *const P1) bool {\n        return c.p1_is_inf(&self.point);\n    }\n\n    pub fn is_equal(self: *const P1, p: *const P1) bool {\n        return c.p1_is_equal(&self.point, &p.point);\n    }\n\n    pub fn aggregate(self: *P1, p: *const P1_Affine) !void {\n        if (!c.p1_affine_in_g1(&p.point)) {\n            return Error.POINT_NOT_IN_GROUP;\n        }\n        c.p1_add_or_double_affine(&self.point, &self.point, &p.point);\n    }\n\n    pub fn hash_to(msg: []const u8, DST: []const u8, aug: ?[]const u8) P1 {\n        const opt = aug orelse &[_]u8{};\n        var ret: P1 = undefined;\n\n        c.hash_to_g1(&ret.point, @ptrCast(msg), msg.len,\n                                 @ptrCast(DST), DST.len,\n                                 @ptrCast(opt), opt.len);\n        return ret;\n    }\n\n    pub fn encode_to(msg: []const u8, DST: []const u8, aug: ?[]const u8) P1 {\n        const opt = aug orelse &[_]u8{};\n        var ret: P1 = undefined;\n\n        c.encode_to_g1(&ret.point, @ptrCast(msg), msg.len,\n                                   @ptrCast(DST), DST.len,\n                                   @ptrCast(opt), opt.len);\n        return ret;\n    }\n\n    pub fn sign_with(self: *const P1, sk: *const SecretKey) *P1 {\n        c.sign_pk_in_g2(@constCast(&self.point), &self.point, &sk.key);\n        return @constCast(self);\n    }\n\n    pub fn to_affine(self: *const P1) P1_Affine {\n        var ret: P1_Affine = undefined;\n        c.p1_to_affine(&ret.point, &self.point);\n        return ret;\n    }\n\n    pub fn generator() P1 {\n        return P1{\n            .point = c.p1_generator().*,\n        };\n    }\n};\n\n\npub const P2_Affine = struct {\n    point: c.p2_affine = c.p2_affine{},\n\n    pub fn from(in: anytype) !P2_Affine {\n        switch (@TypeOf(in)) {\n            *const P2,\n            *P2  => return in.to_affine(),\n            P2   => @compileError(\"expected type '*const blst.P2', found 'blst.P2'\"),\n            else => |T| {\n                switch (@typeInfo(T)) {\n                    .pointer => { const s: []const u8 = in; _ = s; },\n                    else     => @compileError(\"expected type '[]const u8', found '\" ++ @typeName(T) ++ \"'\"),\n                }\n\n                var ret: P2_Affine = undefined;\n                const err = ret.deserialize(in);\n                return if (err == .SUCCESS) ret else err.as_error();\n            },\n        }\n        unreachable;\n    }\n\n    pub fn deserialize(self: *P2_Affine, in: []const u8) ERROR {\n        if (in.len == 0) {\n            return .BAD_ENCODING;\n        }\n        const expected = @as(usize, if (in[0]&0x80 != 0) P2_COMPRESS_BYTES\n                                    else                 P2_SERIALIZE_BYTES);\n        if (in.len != expected) {\n            return .BAD_ENCODING;\n        }\n        const err = c.p2_deserialize(&self.point, &in[0]);\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn serialize(self: *const P2_Affine) [P2_SERIALIZE_BYTES]u8 {\n        var ret: [P2_SERIALIZE_BYTES]u8 = undefined;\n        c.p2_affine_serialize(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn compress(self: *const P2_Affine) [P2_COMPRESS_BYTES]u8 {\n        var ret: [P2_COMPRESS_BYTES]u8 = undefined;\n        c.p2_affine_compress(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn dup(self: *const P2_Affine) P2_Affine {\n        return self.*;\n    }\n\n    pub fn on_curve(self: *const P2_Affine) bool {\n        return c.p2_affine_on_curve(&self.point);\n    }\n\n    pub fn in_group(self: *const P2_Affine) bool {\n        return c.p2_affine_in_g2(&self.point);\n    }\n\n    pub fn is_inf(self: *const P2_Affine) bool {\n        return c.p2_affine_is_inf(&self.point);\n    }\n\n    pub fn is_equal(self: *const P2_Affine, p: *const P2_Affine) bool {\n        return c.p2_affine_is_equal(&self.point, &p.point);\n    }\n\n    pub fn core_verify(self: *const P2_Affine, pk: *const P1_Affine,\n                       hash_or_encode: bool, msg: []const u8, DST: []const u8,\n                       aug: ?[]const u8) ERROR {\n        const opt = aug orelse &[_]u8{};\n        const err = c.core_verify_pk_in_g1(&pk.point, &self.point,\n                                           hash_or_encode,\n                                           @ptrCast(msg), msg.len,\n                                           @ptrCast(DST), DST.len,\n                                           @ptrCast(opt), opt.len);\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn generator() P2_Affine {\n        return P2_Affine{\n            .point = c.p2_affine_generator().*,\n        };\n    }\n\n    pub fn to_jacobian(self: *const P2_Affine) P2 {\n        var ret: P2 = undefined;\n        c.p2_from_affine(&ret.point, &self.point);\n        return ret;\n    }\n};\n\npub const P2 = struct {\n    point: c.p2 = c.p2{},\n\n    pub fn from(in: anytype) !P2 {\n        switch (@TypeOf(in)) {\n            *const SecretKey,\n            *SecretKey  => return P2.public_key(in),\n            SecretKey   => @compileError(\"expected type '*const blst.SecretKey', found 'blst.SecretKey'\"),\n            *const P2_Affine,\n            *P2_Affine  => return in.to_jacobian(),\n            P2_Affine   => @compileError(\"expected type '*const blst.P2_Affine', found 'blst.P2_Affine'\"),\n            else        => |T| {\n                switch (@typeInfo(T)) {\n                    .pointer => { const s: []const u8 = in; _ = s; },\n                    else     => @compileError(\"expected type '[]const u8', found '\" ++ @typeName(T) ++ \"'\"),\n                }\n\n                var ret: P2 = undefined;\n                const err = ret.deserialize(in);\n                return if (err == .SUCCESS) ret else err.as_error();\n            },\n        }\n        unreachable;\n    }\n\n    pub fn deserialize(self: *P2, in: []const u8) ERROR {\n        if (in.len == 0) {\n            return .BAD_ENCODING;\n        }\n        const expected = @as(usize, if (in[0]&0x80 != 0) P2_COMPRESS_BYTES\n                                    else                 P2_SERIALIZE_BYTES);\n        if (in.len != expected) {\n            return .BAD_ENCODING;\n        }\n        const err = c.p2_deserialize(@ptrCast(&self.point), &in[0]);\n        if (err == c.SUCCESS) {\n            c.p2_from_affine(&self.point, @ptrCast(&self.point));\n        }\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn serialize(self: *const P2) [P2_SERIALIZE_BYTES]u8 {\n        var ret: [P2_SERIALIZE_BYTES]u8 = undefined;\n        c.p2_serialize(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn compress(self: *const P2) [P2_COMPRESS_BYTES]u8 {\n        var ret: [P2_COMPRESS_BYTES]u8 = undefined;\n        c.p2_compress(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn public_key(sk: *const SecretKey) P2 {\n        var ret: P2 = undefined;\n        c.sk_to_pk_in_g2(&ret.point, &sk.key);\n        return ret;\n    }\n\n    pub fn dup(self: *const P2) P2 {\n        return self.*;\n    }\n\n    pub fn on_curve(self: *const P2) bool {\n        return c.p2_on_curve(&self.point);\n    }\n\n    pub fn in_group(self: *const P2) bool {\n        return c.p2_in_g2(&self.point);\n    }\n\n    pub fn is_inf(self: *const P2) bool {\n        return c.p2_is_inf(&self.point);\n    }\n\n    pub fn is_equal(self: *const P2, p: *const P2) bool {\n        return c.p2_is_equal(&self.point, &p.point);\n    }\n\n    pub fn aggregate(self: *P2, p: *const P2_Affine) !void {\n        if (!c.p2_affine_in_g2(&p.point)) {\n            return Error.POINT_NOT_IN_GROUP;\n        }\n        c.p2_add_or_double_affine(&self.point, &self.point, &p.point);\n    }\n\n    pub fn hash_to(msg: []const u8, DST: []const u8, aug: ?[]const u8) P2 {\n        const opt = aug orelse &[_]u8{};\n        var ret: P2 = undefined;\n\n        c.hash_to_g2(&ret.point, @ptrCast(msg), msg.len,\n                                 @ptrCast(DST), DST.len,\n                                 @ptrCast(opt), opt.len);\n        return ret;\n    }\n\n    pub fn encode_to(msg: []const u8, DST: []const u8, aug: ?[]const u8) P2 {\n        const opt = aug orelse &[_]u8{};\n        var ret: P2 = undefined;\n\n        c.encode_to_g2(&ret.point, @ptrCast(msg), msg.len,\n                                   @ptrCast(DST), DST.len,\n                                   @ptrCast(opt), opt.len);\n        return ret;\n    }\n\n    pub fn sign_with(self: *const P2, sk: *const SecretKey) *P2 {\n        c.sign_pk_in_g1(@constCast(&self.point), &self.point, &sk.key);\n        return @constCast(self);\n    }\n\n    pub fn to_affine(self: *const P2) P2_Affine {\n        var ret: P2_Affine = undefined;\n        c.p2_to_affine(&ret.point, &self.point);\n        return ret;\n    }\n\n    pub fn generator() P2 {\n        return P2{\n            .point = c.p2_generator().*,\n        };\n    }\n};\n\n"
  },
  {
    "path": "bindings/zig/c.zig",
    "content": "// automatically generated with 'zig translate-c'\nconst BLST_SUCCESS: c_int = 0;\nconst BLST_BAD_ENCODING: c_int = 1;\nconst BLST_POINT_NOT_ON_CURVE: c_int = 2;\nconst BLST_POINT_NOT_IN_GROUP: c_int = 3;\nconst BLST_AGGR_TYPE_MISMATCH: c_int = 4;\nconst BLST_VERIFY_FAIL: c_int = 5;\nconst BLST_PK_IS_INFINITY: c_int = 6;\nconst BLST_BAD_SCALAR: c_int = 7;\nconst BLST_ERROR = c_uint;\npub const byte = u8;\npub const limb_t = u64;\nconst blst_scalar = extern struct {\n    b: [32]byte = @import(\"std\").mem.zeroes([32]byte),\n};\nconst blst_fr = extern struct {\n    l: [4]limb_t = @import(\"std\").mem.zeroes([4]limb_t),\n};\nconst blst_fp = extern struct {\n    l: [6]limb_t = @import(\"std\").mem.zeroes([6]limb_t),\n};\nconst blst_fp2 = extern struct {\n    fp: [2]blst_fp = @import(\"std\").mem.zeroes([2]blst_fp),\n};\nconst blst_fp6 = extern struct {\n    fp2: [3]blst_fp2 = @import(\"std\").mem.zeroes([3]blst_fp2),\n};\nconst blst_fp12 = extern struct {\n    fp6: [2]blst_fp6 = @import(\"std\").mem.zeroes([2]blst_fp6),\n};\nextern fn blst_scalar_from_uint32(out: [*c]blst_scalar, a: [*c]const u32) void;\nextern fn blst_uint32_from_scalar(out: [*c]u32, a: [*c]const blst_scalar) void;\nextern fn blst_scalar_from_uint64(out: [*c]blst_scalar, a: [*c]const u64) void;\nextern fn blst_uint64_from_scalar(out: [*c]u64, a: [*c]const blst_scalar) void;\nextern fn blst_scalar_from_bendian(out: [*c]blst_scalar, a: [*c]const byte) void;\nextern fn blst_bendian_from_scalar(out: [*c]byte, a: [*c]const blst_scalar) void;\nextern fn blst_scalar_from_lendian(out: [*c]blst_scalar, a: [*c]const byte) void;\nextern fn blst_lendian_from_scalar(out: [*c]byte, a: [*c]const blst_scalar) void;\nextern fn blst_scalar_fr_check(a: [*c]const blst_scalar) bool;\nextern fn blst_sk_check(a: [*c]const blst_scalar) bool;\nextern fn blst_sk_add_n_check(out: [*c]blst_scalar, a: [*c]const blst_scalar, b: [*c]const blst_scalar) bool;\nextern fn blst_sk_sub_n_check(out: [*c]blst_scalar, a: [*c]const blst_scalar, b: [*c]const blst_scalar) bool;\nextern fn blst_sk_mul_n_check(out: [*c]blst_scalar, a: [*c]const blst_scalar, b: [*c]const blst_scalar) bool;\nextern fn blst_sk_inverse(out: [*c]blst_scalar, a: [*c]const blst_scalar) void;\nextern fn blst_scalar_from_le_bytes(out: [*c]blst_scalar, in: [*c]const byte, len: usize) bool;\nextern fn blst_scalar_from_be_bytes(out: [*c]blst_scalar, in: [*c]const byte, len: usize) bool;\nextern fn blst_fr_add(ret: [*c]blst_fr, a: [*c]const blst_fr, b: [*c]const blst_fr) void;\nextern fn blst_fr_sub(ret: [*c]blst_fr, a: [*c]const blst_fr, b: [*c]const blst_fr) void;\nextern fn blst_fr_mul_by_3(ret: [*c]blst_fr, a: [*c]const blst_fr) void;\nextern fn blst_fr_lshift(ret: [*c]blst_fr, a: [*c]const blst_fr, count: usize) void;\nextern fn blst_fr_rshift(ret: [*c]blst_fr, a: [*c]const blst_fr, count: usize) void;\nextern fn blst_fr_mul(ret: [*c]blst_fr, a: [*c]const blst_fr, b: [*c]const blst_fr) void;\nextern fn blst_fr_sqr(ret: [*c]blst_fr, a: [*c]const blst_fr) void;\nextern fn blst_fr_cneg(ret: [*c]blst_fr, a: [*c]const blst_fr, flag: bool) void;\nextern fn blst_fr_eucl_inverse(ret: [*c]blst_fr, a: [*c]const blst_fr) void;\nextern fn blst_fr_inverse(ret: [*c]blst_fr, a: [*c]const blst_fr) void;\nextern fn blst_fr_from_uint64(ret: [*c]blst_fr, a: [*c]const u64) void;\nextern fn blst_uint64_from_fr(ret: [*c]u64, a: [*c]const blst_fr) void;\nextern fn blst_fr_from_scalar(ret: [*c]blst_fr, a: [*c]const blst_scalar) void;\nextern fn blst_scalar_from_fr(ret: [*c]blst_scalar, a: [*c]const blst_fr) void;\nextern fn blst_fp_add(ret: [*c]blst_fp, a: [*c]const blst_fp, b: [*c]const blst_fp) void;\nextern fn blst_fp_sub(ret: [*c]blst_fp, a: [*c]const blst_fp, b: [*c]const blst_fp) void;\nextern fn blst_fp_mul_by_3(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_mul_by_8(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_lshift(ret: [*c]blst_fp, a: [*c]const blst_fp, count: usize) void;\nextern fn blst_fp_mul(ret: [*c]blst_fp, a: [*c]const blst_fp, b: [*c]const blst_fp) void;\nextern fn blst_fp_sqr(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_cneg(ret: [*c]blst_fp, a: [*c]const blst_fp, flag: bool) void;\nextern fn blst_fp_eucl_inverse(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_inverse(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_sqrt(ret: [*c]blst_fp, a: [*c]const blst_fp) bool;\nextern fn blst_fp_from_uint32(ret: [*c]blst_fp, a: [*c]const u32) void;\nextern fn blst_uint32_from_fp(ret: [*c]u32, a: [*c]const blst_fp) void;\nextern fn blst_fp_from_uint64(ret: [*c]blst_fp, a: [*c]const u64) void;\nextern fn blst_uint64_from_fp(ret: [*c]u64, a: [*c]const blst_fp) void;\nextern fn blst_fp_from_bendian(ret: [*c]blst_fp, a: [*c]const byte) void;\nextern fn blst_bendian_from_fp(ret: [*c]byte, a: [*c]const blst_fp) void;\nextern fn blst_fp_from_lendian(ret: [*c]blst_fp, a: [*c]const byte) void;\nextern fn blst_lendian_from_fp(ret: [*c]byte, a: [*c]const blst_fp) void;\nextern fn blst_fp2_add(ret: [*c]blst_fp2, a: [*c]const blst_fp2, b: [*c]const blst_fp2) void;\nextern fn blst_fp2_sub(ret: [*c]blst_fp2, a: [*c]const blst_fp2, b: [*c]const blst_fp2) void;\nextern fn blst_fp2_mul_by_3(ret: [*c]blst_fp2, a: [*c]const blst_fp2) void;\nextern fn blst_fp2_mul_by_8(ret: [*c]blst_fp2, a: [*c]const blst_fp2) void;\nextern fn blst_fp2_lshift(ret: [*c]blst_fp2, a: [*c]const blst_fp2, count: usize) void;\nextern fn blst_fp2_mul(ret: [*c]blst_fp2, a: [*c]const blst_fp2, b: [*c]const blst_fp2) void;\nextern fn blst_fp2_sqr(ret: [*c]blst_fp2, a: [*c]const blst_fp2) void;\nextern fn blst_fp2_cneg(ret: [*c]blst_fp2, a: [*c]const blst_fp2, flag: bool) void;\nextern fn blst_fp2_eucl_inverse(ret: [*c]blst_fp2, a: [*c]const blst_fp2) void;\nextern fn blst_fp2_inverse(ret: [*c]blst_fp2, a: [*c]const blst_fp2) void;\nextern fn blst_fp2_sqrt(ret: [*c]blst_fp2, a: [*c]const blst_fp2) bool;\nextern fn blst_fp12_sqr(ret: [*c]blst_fp12, a: [*c]const blst_fp12) void;\nextern fn blst_fp12_cyclotomic_sqr(ret: [*c]blst_fp12, a: [*c]const blst_fp12) void;\nextern fn blst_fp12_mul(ret: [*c]blst_fp12, a: [*c]const blst_fp12, b: [*c]const blst_fp12) void;\nextern fn blst_fp12_mul_by_xy00z0(ret: [*c]blst_fp12, a: [*c]const blst_fp12, xy00z0: [*c]const blst_fp6) void;\nextern fn blst_fp12_conjugate(a: [*c]blst_fp12) void;\nextern fn blst_fp12_inverse(ret: [*c]blst_fp12, a: [*c]const blst_fp12) void;\nextern fn blst_fp12_frobenius_map(ret: [*c]blst_fp12, a: [*c]const blst_fp12, n: usize) void;\nextern fn blst_fp12_is_equal(a: [*c]const blst_fp12, b: [*c]const blst_fp12) bool;\nextern fn blst_fp12_is_one(a: [*c]const blst_fp12) bool;\nextern fn blst_fp12_in_group(a: [*c]const blst_fp12) bool;\nextern fn blst_fp12_one() [*c]const blst_fp12;\nconst blst_p1 = extern struct {\n    x: blst_fp = @import(\"std\").mem.zeroes(blst_fp),\n    y: blst_fp = @import(\"std\").mem.zeroes(blst_fp),\n    z: blst_fp = @import(\"std\").mem.zeroes(blst_fp),\n};\nconst blst_p1_affine = extern struct {\n    x: blst_fp = @import(\"std\").mem.zeroes(blst_fp),\n    y: blst_fp = @import(\"std\").mem.zeroes(blst_fp),\n};\nextern fn blst_p1_add(out: [*c]blst_p1, a: [*c]const blst_p1, b: [*c]const blst_p1) void;\nextern fn blst_p1_add_or_double(out: [*c]blst_p1, a: [*c]const blst_p1, b: [*c]const blst_p1) void;\nextern fn blst_p1_add_affine(out: [*c]blst_p1, a: [*c]const blst_p1, b: [*c]const blst_p1_affine) void;\nextern fn blst_p1_add_or_double_affine(out: [*c]blst_p1, a: [*c]const blst_p1, b: [*c]const blst_p1_affine) void;\nextern fn blst_p1_double(out: [*c]blst_p1, a: [*c]const blst_p1) void;\nextern fn blst_p1_mult(out: [*c]blst_p1, p: [*c]const blst_p1, scalar: [*c]const byte, nbits: usize) void;\nextern fn blst_p1_cneg(p: [*c]blst_p1, cbit: bool) void;\nextern fn blst_p1_to_affine(out: [*c]blst_p1_affine, in: [*c]const blst_p1) void;\nextern fn blst_p1_from_affine(out: [*c]blst_p1, in: [*c]const blst_p1_affine) void;\nextern fn blst_p1_on_curve(p: [*c]const blst_p1) bool;\nextern fn blst_p1_in_g1(p: [*c]const blst_p1) bool;\nextern fn blst_p1_is_equal(a: [*c]const blst_p1, b: [*c]const blst_p1) bool;\nextern fn blst_p1_is_inf(a: [*c]const blst_p1) bool;\nextern fn blst_p1_generator() [*c]const blst_p1;\nextern fn blst_p1_affine_on_curve(p: [*c]const blst_p1_affine) bool;\nextern fn blst_p1_affine_in_g1(p: [*c]const blst_p1_affine) bool;\nextern fn blst_p1_affine_is_equal(a: [*c]const blst_p1_affine, b: [*c]const blst_p1_affine) bool;\nextern fn blst_p1_affine_is_inf(a: [*c]const blst_p1_affine) bool;\nextern fn blst_p1_affine_generator() [*c]const blst_p1_affine;\nconst blst_p2 = extern struct {\n    x: blst_fp2 = @import(\"std\").mem.zeroes(blst_fp2),\n    y: blst_fp2 = @import(\"std\").mem.zeroes(blst_fp2),\n    z: blst_fp2 = @import(\"std\").mem.zeroes(blst_fp2),\n};\nconst blst_p2_affine = extern struct {\n    x: blst_fp2 = @import(\"std\").mem.zeroes(blst_fp2),\n    y: blst_fp2 = @import(\"std\").mem.zeroes(blst_fp2),\n};\nextern fn blst_p2_add(out: [*c]blst_p2, a: [*c]const blst_p2, b: [*c]const blst_p2) void;\nextern fn blst_p2_add_or_double(out: [*c]blst_p2, a: [*c]const blst_p2, b: [*c]const blst_p2) void;\nextern fn blst_p2_add_affine(out: [*c]blst_p2, a: [*c]const blst_p2, b: [*c]const blst_p2_affine) void;\nextern fn blst_p2_add_or_double_affine(out: [*c]blst_p2, a: [*c]const blst_p2, b: [*c]const blst_p2_affine) void;\nextern fn blst_p2_double(out: [*c]blst_p2, a: [*c]const blst_p2) void;\nextern fn blst_p2_mult(out: [*c]blst_p2, p: [*c]const blst_p2, scalar: [*c]const byte, nbits: usize) void;\nextern fn blst_p2_cneg(p: [*c]blst_p2, cbit: bool) void;\nextern fn blst_p2_to_affine(out: [*c]blst_p2_affine, in: [*c]const blst_p2) void;\nextern fn blst_p2_from_affine(out: [*c]blst_p2, in: [*c]const blst_p2_affine) void;\nextern fn blst_p2_on_curve(p: [*c]const blst_p2) bool;\nextern fn blst_p2_in_g2(p: [*c]const blst_p2) bool;\nextern fn blst_p2_is_equal(a: [*c]const blst_p2, b: [*c]const blst_p2) bool;\nextern fn blst_p2_is_inf(a: [*c]const blst_p2) bool;\nextern fn blst_p2_generator() [*c]const blst_p2;\nextern fn blst_p2_affine_on_curve(p: [*c]const blst_p2_affine) bool;\nextern fn blst_p2_affine_in_g2(p: [*c]const blst_p2_affine) bool;\nextern fn blst_p2_affine_is_equal(a: [*c]const blst_p2_affine, b: [*c]const blst_p2_affine) bool;\nextern fn blst_p2_affine_is_inf(a: [*c]const blst_p2_affine) bool;\nextern fn blst_p2_affine_generator() [*c]const blst_p2_affine;\nextern fn blst_p1s_to_affine(dst: [*c]blst_p1_affine, points: [*c]const [*c]const blst_p1, npoints: usize) void;\nextern fn blst_p1s_add(ret: [*c]blst_p1, points: [*c]const [*c]const blst_p1_affine, npoints: usize) void;\nextern fn blst_p1s_mult_wbits_precompute_sizeof(wbits: usize, npoints: usize) usize;\nextern fn blst_p1s_mult_wbits_precompute(table: [*c]blst_p1_affine, wbits: usize, points: [*c]const [*c]const blst_p1_affine, npoints: usize) void;\nextern fn blst_p1s_mult_wbits_scratch_sizeof(npoints: usize) usize;\nextern fn blst_p1s_mult_wbits(ret: [*c]blst_p1, table: [*c]const blst_p1_affine, wbits: usize, npoints: usize, scalars: [*c]const [*c]const byte, nbits: usize, scratch: [*c]limb_t) void;\nextern fn blst_p1s_mult_pippenger_scratch_sizeof(npoints: usize) usize;\nextern fn blst_p1s_mult_pippenger(ret: [*c]blst_p1, points: [*c]const [*c]const blst_p1_affine, npoints: usize, scalars: [*c]const [*c]const byte, nbits: usize, scratch: [*c]limb_t) void;\nextern fn blst_p1s_tile_pippenger(ret: [*c]blst_p1, points: [*c]const [*c]const blst_p1_affine, npoints: usize, scalars: [*c]const [*c]const byte, nbits: usize, scratch: [*c]limb_t, bit0: usize, window: usize) void;\nextern fn blst_p2s_to_affine(dst: [*c]blst_p2_affine, points: [*c]const [*c]const blst_p2, npoints: usize) void;\nextern fn blst_p2s_add(ret: [*c]blst_p2, points: [*c]const [*c]const blst_p2_affine, npoints: usize) void;\nextern fn blst_p2s_mult_wbits_precompute_sizeof(wbits: usize, npoints: usize) usize;\nextern fn blst_p2s_mult_wbits_precompute(table: [*c]blst_p2_affine, wbits: usize, points: [*c]const [*c]const blst_p2_affine, npoints: usize) void;\nextern fn blst_p2s_mult_wbits_scratch_sizeof(npoints: usize) usize;\nextern fn blst_p2s_mult_wbits(ret: [*c]blst_p2, table: [*c]const blst_p2_affine, wbits: usize, npoints: usize, scalars: [*c]const [*c]const byte, nbits: usize, scratch: [*c]limb_t) void;\nextern fn blst_p2s_mult_pippenger_scratch_sizeof(npoints: usize) usize;\nextern fn blst_p2s_mult_pippenger(ret: [*c]blst_p2, points: [*c]const [*c]const blst_p2_affine, npoints: usize, scalars: [*c]const [*c]const byte, nbits: usize, scratch: [*c]limb_t) void;\nextern fn blst_p2s_tile_pippenger(ret: [*c]blst_p2, points: [*c]const [*c]const blst_p2_affine, npoints: usize, scalars: [*c]const [*c]const byte, nbits: usize, scratch: [*c]limb_t, bit0: usize, window: usize) void;\nextern fn blst_map_to_g1(out: [*c]blst_p1, u: [*c]const blst_fp, v: [*c]const blst_fp) void;\nextern fn blst_map_to_g2(out: [*c]blst_p2, u: [*c]const blst_fp2, v: [*c]const blst_fp2) void;\nextern fn blst_encode_to_g1(out: [*c]blst_p1, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize, aug: [*c]const byte, aug_len: usize) void;\nextern fn blst_hash_to_g1(out: [*c]blst_p1, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize, aug: [*c]const byte, aug_len: usize) void;\nextern fn blst_encode_to_g2(out: [*c]blst_p2, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize, aug: [*c]const byte, aug_len: usize) void;\nextern fn blst_hash_to_g2(out: [*c]blst_p2, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize, aug: [*c]const byte, aug_len: usize) void;\nextern fn blst_p1_serialize(out: [*c]byte, in: [*c]const blst_p1) void;\nextern fn blst_p1_compress(out: [*c]byte, in: [*c]const blst_p1) void;\nextern fn blst_p1_affine_serialize(out: [*c]byte, in: [*c]const blst_p1_affine) void;\nextern fn blst_p1_affine_compress(out: [*c]byte, in: [*c]const blst_p1_affine) void;\nextern fn blst_p1_uncompress(out: [*c]blst_p1_affine, in: [*c]const byte) BLST_ERROR;\nextern fn blst_p1_deserialize(out: [*c]blst_p1_affine, in: [*c]const byte) BLST_ERROR;\nextern fn blst_p2_serialize(out: [*c]byte, in: [*c]const blst_p2) void;\nextern fn blst_p2_compress(out: [*c]byte, in: [*c]const blst_p2) void;\nextern fn blst_p2_affine_serialize(out: [*c]byte, in: [*c]const blst_p2_affine) void;\nextern fn blst_p2_affine_compress(out: [*c]byte, in: [*c]const blst_p2_affine) void;\nextern fn blst_p2_uncompress(out: [*c]blst_p2_affine, in: [*c]const byte) BLST_ERROR;\nextern fn blst_p2_deserialize(out: [*c]blst_p2_affine, in: [*c]const byte) BLST_ERROR;\nextern fn blst_keygen(out_SK: [*c]blst_scalar, IKM: [*c]const byte, IKM_len: usize, info: [*c]const byte, info_len: usize) void;\nextern fn blst_sk_to_pk_in_g1(out_pk: [*c]blst_p1, SK: [*c]const blst_scalar) void;\nextern fn blst_sign_pk_in_g1(out_sig: [*c]blst_p2, hash: [*c]const blst_p2, SK: [*c]const blst_scalar) void;\nextern fn blst_sk_to_pk_in_g2(out_pk: [*c]blst_p2, SK: [*c]const blst_scalar) void;\nextern fn blst_sign_pk_in_g2(out_sig: [*c]blst_p1, hash: [*c]const blst_p1, SK: [*c]const blst_scalar) void;\nextern fn blst_miller_loop(ret: [*c]blst_fp12, Q: [*c]const blst_p2_affine, P: [*c]const blst_p1_affine) void;\nextern fn blst_miller_loop_n(ret: [*c]blst_fp12, Qs: [*c]const [*c]const blst_p2_affine, Ps: [*c]const [*c]const blst_p1_affine, n: usize) void;\nextern fn blst_final_exp(ret: [*c]blst_fp12, f: [*c]const blst_fp12) void;\nextern fn blst_precompute_lines(Qlines: [*c]blst_fp6, Q: [*c]const blst_p2_affine) void;\nextern fn blst_miller_loop_lines(ret: [*c]blst_fp12, Qlines: [*c]const blst_fp6, P: [*c]const blst_p1_affine) void;\nextern fn blst_fp12_finalverify(gt1: [*c]const blst_fp12, gt2: [*c]const blst_fp12) bool;\npub const struct_blst_opaque = opaque {};\nconst blst_pairing = struct_blst_opaque;\nextern fn blst_pairing_sizeof() usize;\nextern fn blst_pairing_init(new_ctx: ?*blst_pairing, hash_or_encode: bool, DST: [*c]const byte, DST_len: usize) void;\nextern fn blst_pairing_get_dst(ctx: ?*const blst_pairing) [*c]const byte;\nextern fn blst_pairing_commit(ctx: ?*blst_pairing) void;\nextern fn blst_pairing_aggregate_pk_in_g2(ctx: ?*blst_pairing, PK: [*c]const blst_p2_affine, signature: [*c]const blst_p1_affine, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_chk_n_aggr_pk_in_g2(ctx: ?*blst_pairing, PK: [*c]const blst_p2_affine, pk_grpchk: bool, signature: [*c]const blst_p1_affine, sig_grpchk: bool, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_mul_n_aggregate_pk_in_g2(ctx: ?*blst_pairing, PK: [*c]const blst_p2_affine, sig: [*c]const blst_p1_affine, scalar: [*c]const byte, nbits: usize, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_chk_n_mul_n_aggr_pk_in_g2(ctx: ?*blst_pairing, PK: [*c]const blst_p2_affine, pk_grpchk: bool, sig: [*c]const blst_p1_affine, sig_grpchk: bool, scalar: [*c]const byte, nbits: usize, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_aggregate_pk_in_g1(ctx: ?*blst_pairing, PK: [*c]const blst_p1_affine, signature: [*c]const blst_p2_affine, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_chk_n_aggr_pk_in_g1(ctx: ?*blst_pairing, PK: [*c]const blst_p1_affine, pk_grpchk: bool, signature: [*c]const blst_p2_affine, sig_grpchk: bool, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_mul_n_aggregate_pk_in_g1(ctx: ?*blst_pairing, PK: [*c]const blst_p1_affine, sig: [*c]const blst_p2_affine, scalar: [*c]const byte, nbits: usize, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_chk_n_mul_n_aggr_pk_in_g1(ctx: ?*blst_pairing, PK: [*c]const blst_p1_affine, pk_grpchk: bool, sig: [*c]const blst_p2_affine, sig_grpchk: bool, scalar: [*c]const byte, nbits: usize, msg: [*c]const byte, msg_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_pairing_merge(ctx: ?*blst_pairing, ctx1: ?*const blst_pairing) BLST_ERROR;\nextern fn blst_pairing_finalverify(ctx: ?*const blst_pairing, gtsig: [*c]const blst_fp12) bool;\nextern fn blst_aggregate_in_g1(out: [*c]blst_p1, in: [*c]const blst_p1, zwire: [*c]const byte) BLST_ERROR;\nextern fn blst_aggregate_in_g2(out: [*c]blst_p2, in: [*c]const blst_p2, zwire: [*c]const byte) BLST_ERROR;\nextern fn blst_aggregated_in_g1(out: [*c]blst_fp12, signature: [*c]const blst_p1_affine) void;\nextern fn blst_aggregated_in_g2(out: [*c]blst_fp12, signature: [*c]const blst_p2_affine) void;\nextern fn blst_core_verify_pk_in_g1(pk: [*c]const blst_p1_affine, signature: [*c]const blst_p2_affine, hash_or_encode: bool, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\nextern fn blst_core_verify_pk_in_g2(pk: [*c]const blst_p2_affine, signature: [*c]const blst_p1_affine, hash_or_encode: bool, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize, aug: [*c]const byte, aug_len: usize) BLST_ERROR;\npub extern const BLS12_381_G1: blst_p1_affine;\npub extern const BLS12_381_NEG_G1: blst_p1_affine;\npub extern const BLS12_381_G2: blst_p2_affine;\npub extern const BLS12_381_NEG_G2: blst_p2_affine;\nextern fn blst_fr_ct_bfly(x0: [*c]blst_fr, x1: [*c]blst_fr, twiddle: [*c]const blst_fr) void;\nextern fn blst_fr_gs_bfly(x0: [*c]blst_fr, x1: [*c]blst_fr, twiddle: [*c]const blst_fr) void;\nextern fn blst_fr_to(ret: [*c]blst_fr, a: [*c]const blst_fr) void;\nextern fn blst_fr_from(ret: [*c]blst_fr, a: [*c]const blst_fr) void;\nextern fn blst_fp_to(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_from(ret: [*c]blst_fp, a: [*c]const blst_fp) void;\nextern fn blst_fp_is_square(a: [*c]const blst_fp) bool;\nextern fn blst_fp2_is_square(a: [*c]const blst_fp2) bool;\nextern fn blst_p1_from_jacobian(out: [*c]blst_p1, in: [*c]const blst_p1) void;\nextern fn blst_p2_from_jacobian(out: [*c]blst_p2, in: [*c]const blst_p2) void;\nextern fn blst_sk_to_pk2_in_g1(out: [*c]byte, out_pk: [*c]blst_p1_affine, SK: [*c]const blst_scalar) void;\nextern fn blst_sign_pk2_in_g1(out: [*c]byte, out_sig: [*c]blst_p2_affine, hash: [*c]const blst_p2, SK: [*c]const blst_scalar) void;\nextern fn blst_sk_to_pk2_in_g2(out: [*c]byte, out_pk: [*c]blst_p2_affine, SK: [*c]const blst_scalar) void;\nextern fn blst_sign_pk2_in_g2(out: [*c]byte, out_sig: [*c]blst_p1_affine, hash: [*c]const blst_p1, SK: [*c]const blst_scalar) void;\nconst blst_uniq = struct_blst_opaque;\nextern fn blst_uniq_sizeof(n_nodes: usize) usize;\nextern fn blst_uniq_init(tree: ?*blst_uniq) void;\nextern fn blst_uniq_test(tree: ?*blst_uniq, msg: [*c]const byte, len: usize) bool;\nextern fn blst_expand_message_xmd(out: [*c]byte, out_len: usize, msg: [*c]const byte, msg_len: usize, DST: [*c]const byte, DST_len: usize) void;\nextern fn blst_p1_unchecked_mult(out: [*c]blst_p1, p: [*c]const blst_p1, scalar: [*c]const byte, nbits: usize) void;\nextern fn blst_p2_unchecked_mult(out: [*c]blst_p2, p: [*c]const blst_p2, scalar: [*c]const byte, nbits: usize) void;\nextern fn blst_pairing_raw_aggregate(ctx: ?*blst_pairing, q: [*c]const blst_p2_affine, p: [*c]const blst_p1_affine) void;\nextern fn blst_pairing_as_fp12(ctx: ?*blst_pairing) [*c]blst_fp12;\nextern fn blst_bendian_from_fp12(out: [*c]byte, a: [*c]const blst_fp12) void;\nextern fn blst_keygen_v3(out_SK: [*c]blst_scalar, IKM: [*c]const byte, IKM_len: usize, info: [*c]const byte, info_len: usize) void;\nextern fn blst_keygen_v4_5(out_SK: [*c]blst_scalar, IKM: [*c]const byte, IKM_len: usize, salt: [*c]const byte, salt_len: usize, info: [*c]const byte, info_len: usize) void;\nextern fn blst_keygen_v5(out_SK: [*c]blst_scalar, IKM: [*c]const byte, IKM_len: usize, salt: [*c]const byte, salt_len: usize, info: [*c]const byte, info_len: usize) void;\nextern fn blst_derive_master_eip2333(out_SK: [*c]blst_scalar, IKM: [*c]const byte, IKM_len: usize) void;\nextern fn blst_derive_child_eip2333(out_SK: [*c]blst_scalar, SK: [*c]const blst_scalar, child_index: u32) void;\nextern fn blst_scalar_from_hexascii(out: [*c]blst_scalar, hex: [*c]const byte) void;\nextern fn blst_fr_from_hexascii(ret: [*c]blst_fr, hex: [*c]const byte) void;\nextern fn blst_fp_from_hexascii(ret: [*c]blst_fp, hex: [*c]const byte) void;\nextern fn blst_p1_sizeof() usize;\nextern fn blst_p1_affine_sizeof() usize;\nextern fn blst_p2_sizeof() usize;\nextern fn blst_p2_affine_sizeof() usize;\nextern fn blst_fp12_sizeof() usize;\nextern fn blst_fp_from_le_bytes(ret: [*c]blst_fp, in: [*c]const byte, len: usize) void;\nextern fn blst_fp_from_be_bytes(ret: [*c]blst_fp, in: [*c]const byte, len: usize) void;\nextern fn blst_sha256(out: [*c]byte, msg: [*c]const byte, msg_len: usize) void;\n// reexport symbols without blst_ prefix\npub const SUCCESS = BLST_SUCCESS;\npub const BAD_ENCODING = BLST_BAD_ENCODING;\npub const POINT_NOT_ON_CURVE = BLST_POINT_NOT_ON_CURVE;\npub const POINT_NOT_IN_GROUP = BLST_POINT_NOT_IN_GROUP;\npub const AGGR_TYPE_MISMATCH = BLST_AGGR_TYPE_MISMATCH;\npub const VERIFY_FAIL = BLST_VERIFY_FAIL;\npub const PK_IS_INFINITY = BLST_PK_IS_INFINITY;\npub const BAD_SCALAR = BLST_BAD_SCALAR;\npub const ERROR = BLST_ERROR;\npub const scalar = blst_scalar;\npub const fr = blst_fr;\npub const fp = blst_fp;\npub const fp2 = blst_fp2;\npub const fp6 = blst_fp6;\npub const fp12 = blst_fp12;\npub const scalar_from_uint32 = blst_scalar_from_uint32;\npub const uint32_from_scalar = blst_uint32_from_scalar;\npub const scalar_from_uint64 = blst_scalar_from_uint64;\npub const uint64_from_scalar = blst_uint64_from_scalar;\npub const scalar_from_bendian = blst_scalar_from_bendian;\npub const bendian_from_scalar = blst_bendian_from_scalar;\npub const scalar_from_lendian = blst_scalar_from_lendian;\npub const lendian_from_scalar = blst_lendian_from_scalar;\npub const scalar_fr_check = blst_scalar_fr_check;\npub const sk_check = blst_sk_check;\npub const sk_add_n_check = blst_sk_add_n_check;\npub const sk_sub_n_check = blst_sk_sub_n_check;\npub const sk_mul_n_check = blst_sk_mul_n_check;\npub const sk_inverse = blst_sk_inverse;\npub const scalar_from_le_bytes = blst_scalar_from_le_bytes;\npub const scalar_from_be_bytes = blst_scalar_from_be_bytes;\npub const fr_add = blst_fr_add;\npub const fr_sub = blst_fr_sub;\npub const fr_mul_by_3 = blst_fr_mul_by_3;\npub const fr_lshift = blst_fr_lshift;\npub const fr_rshift = blst_fr_rshift;\npub const fr_mul = blst_fr_mul;\npub const fr_sqr = blst_fr_sqr;\npub const fr_cneg = blst_fr_cneg;\npub const fr_eucl_inverse = blst_fr_eucl_inverse;\npub const fr_inverse = blst_fr_inverse;\npub const fr_from_uint64 = blst_fr_from_uint64;\npub const uint64_from_fr = blst_uint64_from_fr;\npub const fr_from_scalar = blst_fr_from_scalar;\npub const scalar_from_fr = blst_scalar_from_fr;\npub const fp_add = blst_fp_add;\npub const fp_sub = blst_fp_sub;\npub const fp_mul_by_3 = blst_fp_mul_by_3;\npub const fp_mul_by_8 = blst_fp_mul_by_8;\npub const fp_lshift = blst_fp_lshift;\npub const fp_mul = blst_fp_mul;\npub const fp_sqr = blst_fp_sqr;\npub const fp_cneg = blst_fp_cneg;\npub const fp_eucl_inverse = blst_fp_eucl_inverse;\npub const fp_inverse = blst_fp_inverse;\npub const fp_sqrt = blst_fp_sqrt;\npub const fp_from_uint32 = blst_fp_from_uint32;\npub const uint32_from_fp = blst_uint32_from_fp;\npub const fp_from_uint64 = blst_fp_from_uint64;\npub const uint64_from_fp = blst_uint64_from_fp;\npub const fp_from_bendian = blst_fp_from_bendian;\npub const bendian_from_fp = blst_bendian_from_fp;\npub const fp_from_lendian = blst_fp_from_lendian;\npub const lendian_from_fp = blst_lendian_from_fp;\npub const fp2_add = blst_fp2_add;\npub const fp2_sub = blst_fp2_sub;\npub const fp2_mul_by_3 = blst_fp2_mul_by_3;\npub const fp2_mul_by_8 = blst_fp2_mul_by_8;\npub const fp2_lshift = blst_fp2_lshift;\npub const fp2_mul = blst_fp2_mul;\npub const fp2_sqr = blst_fp2_sqr;\npub const fp2_cneg = blst_fp2_cneg;\npub const fp2_eucl_inverse = blst_fp2_eucl_inverse;\npub const fp2_inverse = blst_fp2_inverse;\npub const fp2_sqrt = blst_fp2_sqrt;\npub const fp12_sqr = blst_fp12_sqr;\npub const fp12_cyclotomic_sqr = blst_fp12_cyclotomic_sqr;\npub const fp12_mul = blst_fp12_mul;\npub const fp12_mul_by_xy00z0 = blst_fp12_mul_by_xy00z0;\npub const fp12_conjugate = blst_fp12_conjugate;\npub const fp12_inverse = blst_fp12_inverse;\npub const fp12_frobenius_map = blst_fp12_frobenius_map;\npub const fp12_is_equal = blst_fp12_is_equal;\npub const fp12_is_one = blst_fp12_is_one;\npub const fp12_in_group = blst_fp12_in_group;\npub const fp12_one = blst_fp12_one;\npub const p1 = blst_p1;\npub const p1_affine = blst_p1_affine;\npub const p1_add = blst_p1_add;\npub const p1_add_or_double = blst_p1_add_or_double;\npub const p1_add_affine = blst_p1_add_affine;\npub const p1_add_or_double_affine = blst_p1_add_or_double_affine;\npub const p1_double = blst_p1_double;\npub const p1_mult = blst_p1_mult;\npub const p1_cneg = blst_p1_cneg;\npub const p1_to_affine = blst_p1_to_affine;\npub const p1_from_affine = blst_p1_from_affine;\npub const p1_on_curve = blst_p1_on_curve;\npub const p1_in_g1 = blst_p1_in_g1;\npub const p1_is_equal = blst_p1_is_equal;\npub const p1_is_inf = blst_p1_is_inf;\npub const p1_generator = blst_p1_generator;\npub const p1_affine_on_curve = blst_p1_affine_on_curve;\npub const p1_affine_in_g1 = blst_p1_affine_in_g1;\npub const p1_affine_is_equal = blst_p1_affine_is_equal;\npub const p1_affine_is_inf = blst_p1_affine_is_inf;\npub const p1_affine_generator = blst_p1_affine_generator;\npub const p2 = blst_p2;\npub const p2_affine = blst_p2_affine;\npub const p2_add = blst_p2_add;\npub const p2_add_or_double = blst_p2_add_or_double;\npub const p2_add_affine = blst_p2_add_affine;\npub const p2_add_or_double_affine = blst_p2_add_or_double_affine;\npub const p2_double = blst_p2_double;\npub const p2_mult = blst_p2_mult;\npub const p2_cneg = blst_p2_cneg;\npub const p2_to_affine = blst_p2_to_affine;\npub const p2_from_affine = blst_p2_from_affine;\npub const p2_on_curve = blst_p2_on_curve;\npub const p2_in_g2 = blst_p2_in_g2;\npub const p2_is_equal = blst_p2_is_equal;\npub const p2_is_inf = blst_p2_is_inf;\npub const p2_generator = blst_p2_generator;\npub const p2_affine_on_curve = blst_p2_affine_on_curve;\npub const p2_affine_in_g2 = blst_p2_affine_in_g2;\npub const p2_affine_is_equal = blst_p2_affine_is_equal;\npub const p2_affine_is_inf = blst_p2_affine_is_inf;\npub const p2_affine_generator = blst_p2_affine_generator;\npub const p1s_to_affine = blst_p1s_to_affine;\npub const p1s_add = blst_p1s_add;\npub const p1s_mult_wbits_precompute_sizeof = blst_p1s_mult_wbits_precompute_sizeof;\npub const p1s_mult_wbits_precompute = blst_p1s_mult_wbits_precompute;\npub const p1s_mult_wbits_scratch_sizeof = blst_p1s_mult_wbits_scratch_sizeof;\npub const p1s_mult_wbits = blst_p1s_mult_wbits;\npub const p1s_mult_pippenger_scratch_sizeof = blst_p1s_mult_pippenger_scratch_sizeof;\npub const p1s_mult_pippenger = blst_p1s_mult_pippenger;\npub const p1s_tile_pippenger = blst_p1s_tile_pippenger;\npub const p2s_to_affine = blst_p2s_to_affine;\npub const p2s_add = blst_p2s_add;\npub const p2s_mult_wbits_precompute_sizeof = blst_p2s_mult_wbits_precompute_sizeof;\npub const p2s_mult_wbits_precompute = blst_p2s_mult_wbits_precompute;\npub const p2s_mult_wbits_scratch_sizeof = blst_p2s_mult_wbits_scratch_sizeof;\npub const p2s_mult_wbits = blst_p2s_mult_wbits;\npub const p2s_mult_pippenger_scratch_sizeof = blst_p2s_mult_pippenger_scratch_sizeof;\npub const p2s_mult_pippenger = blst_p2s_mult_pippenger;\npub const p2s_tile_pippenger = blst_p2s_tile_pippenger;\npub const map_to_g1 = blst_map_to_g1;\npub const map_to_g2 = blst_map_to_g2;\npub const encode_to_g1 = blst_encode_to_g1;\npub const hash_to_g1 = blst_hash_to_g1;\npub const encode_to_g2 = blst_encode_to_g2;\npub const hash_to_g2 = blst_hash_to_g2;\npub const p1_serialize = blst_p1_serialize;\npub const p1_compress = blst_p1_compress;\npub const p1_affine_serialize = blst_p1_affine_serialize;\npub const p1_affine_compress = blst_p1_affine_compress;\npub const p1_uncompress = blst_p1_uncompress;\npub const p1_deserialize = blst_p1_deserialize;\npub const p2_serialize = blst_p2_serialize;\npub const p2_compress = blst_p2_compress;\npub const p2_affine_serialize = blst_p2_affine_serialize;\npub const p2_affine_compress = blst_p2_affine_compress;\npub const p2_uncompress = blst_p2_uncompress;\npub const p2_deserialize = blst_p2_deserialize;\npub const keygen = blst_keygen;\npub const sk_to_pk_in_g1 = blst_sk_to_pk_in_g1;\npub const sign_pk_in_g1 = blst_sign_pk_in_g1;\npub const sk_to_pk_in_g2 = blst_sk_to_pk_in_g2;\npub const sign_pk_in_g2 = blst_sign_pk_in_g2;\npub const miller_loop = blst_miller_loop;\npub const miller_loop_n = blst_miller_loop_n;\npub const final_exp = blst_final_exp;\npub const precompute_lines = blst_precompute_lines;\npub const miller_loop_lines = blst_miller_loop_lines;\npub const fp12_finalverify = blst_fp12_finalverify;\npub const pairing = blst_pairing;\npub const pairing_sizeof = blst_pairing_sizeof;\npub const pairing_init = blst_pairing_init;\npub const pairing_get_dst = blst_pairing_get_dst;\npub const pairing_commit = blst_pairing_commit;\npub const pairing_aggregate_pk_in_g2 = blst_pairing_aggregate_pk_in_g2;\npub const pairing_chk_n_aggr_pk_in_g2 = blst_pairing_chk_n_aggr_pk_in_g2;\npub const pairing_mul_n_aggregate_pk_in_g2 = blst_pairing_mul_n_aggregate_pk_in_g2;\npub const pairing_chk_n_mul_n_aggr_pk_in_g2 = blst_pairing_chk_n_mul_n_aggr_pk_in_g2;\npub const pairing_aggregate_pk_in_g1 = blst_pairing_aggregate_pk_in_g1;\npub const pairing_chk_n_aggr_pk_in_g1 = blst_pairing_chk_n_aggr_pk_in_g1;\npub const pairing_mul_n_aggregate_pk_in_g1 = blst_pairing_mul_n_aggregate_pk_in_g1;\npub const pairing_chk_n_mul_n_aggr_pk_in_g1 = blst_pairing_chk_n_mul_n_aggr_pk_in_g1;\npub const pairing_merge = blst_pairing_merge;\npub const pairing_finalverify = blst_pairing_finalverify;\npub const aggregate_in_g1 = blst_aggregate_in_g1;\npub const aggregate_in_g2 = blst_aggregate_in_g2;\npub const aggregated_in_g1 = blst_aggregated_in_g1;\npub const aggregated_in_g2 = blst_aggregated_in_g2;\npub const core_verify_pk_in_g1 = blst_core_verify_pk_in_g1;\npub const core_verify_pk_in_g2 = blst_core_verify_pk_in_g2;\npub const fr_ct_bfly = blst_fr_ct_bfly;\npub const fr_gs_bfly = blst_fr_gs_bfly;\npub const fr_to = blst_fr_to;\npub const fr_from = blst_fr_from;\npub const fp_to = blst_fp_to;\npub const fp_from = blst_fp_from;\npub const fp_is_square = blst_fp_is_square;\npub const fp2_is_square = blst_fp2_is_square;\npub const p1_from_jacobian = blst_p1_from_jacobian;\npub const p2_from_jacobian = blst_p2_from_jacobian;\npub const sk_to_pk2_in_g1 = blst_sk_to_pk2_in_g1;\npub const sign_pk2_in_g1 = blst_sign_pk2_in_g1;\npub const sk_to_pk2_in_g2 = blst_sk_to_pk2_in_g2;\npub const sign_pk2_in_g2 = blst_sign_pk2_in_g2;\npub const uniq = blst_uniq;\npub const uniq_sizeof = blst_uniq_sizeof;\npub const uniq_init = blst_uniq_init;\npub const uniq_test = blst_uniq_test;\npub const expand_message_xmd = blst_expand_message_xmd;\npub const p1_unchecked_mult = blst_p1_unchecked_mult;\npub const p2_unchecked_mult = blst_p2_unchecked_mult;\npub const pairing_raw_aggregate = blst_pairing_raw_aggregate;\npub const pairing_as_fp12 = blst_pairing_as_fp12;\npub const bendian_from_fp12 = blst_bendian_from_fp12;\npub const keygen_v3 = blst_keygen_v3;\npub const keygen_v4_5 = blst_keygen_v4_5;\npub const keygen_v5 = blst_keygen_v5;\npub const derive_master_eip2333 = blst_derive_master_eip2333;\npub const derive_child_eip2333 = blst_derive_child_eip2333;\npub const scalar_from_hexascii = blst_scalar_from_hexascii;\npub const fr_from_hexascii = blst_fr_from_hexascii;\npub const fp_from_hexascii = blst_fp_from_hexascii;\npub const p1_sizeof = blst_p1_sizeof;\npub const p1_affine_sizeof = blst_p1_affine_sizeof;\npub const p2_sizeof = blst_p2_sizeof;\npub const p2_affine_sizeof = blst_p2_affine_sizeof;\npub const fp12_sizeof = blst_fp12_sizeof;\npub const fp_from_le_bytes = blst_fp_from_le_bytes;\npub const fp_from_be_bytes = blst_fp_from_be_bytes;\npub const sha256 = blst_sha256;\n"
  },
  {
    "path": "bindings/zig/generate.py",
    "content": "#!/usr/bin/env python3\n\nimport os, re, sys, subprocess\n\ntop_zig = \"\"\"\n// Copyright Supranational LLC\n// SPDX-License-Identifier: Apache-2.0\n\nconst std = @import(\"std\");\n\npub const c = @import(\"c.zig\");\n\npub const Error = error{\n    BAD_ENCODING,\n    POINT_NOT_ON_CURVE,\n    POINT_NOT_IN_GROUP,\n    AGGR_TYPE_MISMATCH,\n    VERIFY_FAIL,\n    PK_IS_INFINITY,\n    BAD_SCALAR,\n    Unknown,\n};\n\npub const ERROR = enum(c.ERROR) {\n    SUCCESS            = c.SUCCESS,\n    BAD_ENCODING       = c.BAD_ENCODING,\n    POINT_NOT_ON_CURVE = c.POINT_NOT_ON_CURVE,\n    POINT_NOT_IN_GROUP = c.POINT_NOT_IN_GROUP,\n    AGGR_TYPE_MISMATCH = c.AGGR_TYPE_MISMATCH,\n    VERIFY_FAIL        = c.VERIFY_FAIL,\n    PK_IS_INFINITY     = c.PK_IS_INFINITY,\n    BAD_SCALAR         = c.BAD_SCALAR,\n\n    pub fn as_error(self: ERROR) Error {\n        return switch (self) {\n            .BAD_ENCODING       => Error.BAD_ENCODING,\n            .POINT_NOT_ON_CURVE => Error.POINT_NOT_ON_CURVE,\n            .POINT_NOT_IN_GROUP => Error.POINT_NOT_IN_GROUP,\n            .AGGR_TYPE_MISMATCH => Error.AGGR_TYPE_MISMATCH,\n            .VERIFY_FAIL        => Error.VERIFY_FAIL,\n            .PK_IS_INFINITY     => Error.PK_IS_INFINITY,\n            .BAD_SCALAR         => Error.BAD_SCALAR,\n            else                => Error.Unknown,\n        };\n    }\n};\n\npub const SecretKey = struct {\n    key: c.scalar = c.scalar{},\n\n    pub fn keygen(self: *SecretKey, IKM: []const u8, info: ?[]const u8) void {\n        const opt = info orelse &[_]u8{};\n        c.keygen(&self.key, @ptrCast(IKM), IKM.len,\n                            @ptrCast(opt), opt.len);\n    }\n\n    pub fn deinit(self: *SecretKey) void {\n        self.key = c.scalar{};\n    }\n};\n\npub const PT = c.fp12;\n\npub const Pairing = struct {\n    ctx: []u64 = &[_]u64{},\n    allocator: std.mem.Allocator,\n\n    pub fn init(hash_or_encode: bool, DST: []const u8,\n                allocator: std.mem.Allocator) !Pairing {\n        const nlimbs = (c.pairing_sizeof() + @sizeOf(u64) - 1) / @sizeOf(u64);\n        const buffer = try allocator.alloc(u64, nlimbs);\n\n        c.pairing_init(@ptrCast(buffer), hash_or_encode, &DST[0], DST.len);\n\n        return Pairing{\n            .ctx = buffer,\n            .allocator = allocator,\n        };\n    }\n\n    pub fn deinit(self: *Pairing) void {\n        self.allocator.free(self.ctx);\n        self.ctx = &[_]u64{};\n    }\n\n    pub fn aggregate(self: *Pairing, pk: anytype, sig: anytype,\n                     msg: []const u8, aug: ?[]const u8) ERROR {\n        const opt = aug orelse &[_]u8{};\n        var err: c.ERROR = undefined;\n\n        switch (@TypeOf(pk)) {\n            *const P1_Affine, *P1_Affine => {\n                const sigp: [*c]const c.p2_affine = switch (@TypeOf(sig)) {\n                    @TypeOf(null) => null,\n                    else => &sig.point,\n                };\n                err = c.pairing_aggregate_pk_in_g1(@ptrCast(self.ctx),\n                                                   &pk.point, sigp,\n                                                   @ptrCast(msg), msg.len,\n                                                   @ptrCast(opt), opt.len);\n            },\n            *const P2_Affine, *P2_Affine => {\n                const sigp: [*c]const c.p1_affine = switch (@TypeOf(sig)) {\n                    @TypeOf(null) => null,\n                    else => &sig.point,\n                };\n                err = c.pairing_aggregate_pk_in_g2(@ptrCast(self.ctx),\n                                                   &pk.point, sigp,\n                                                   @ptrCast(msg), msg.len,\n                                                   @ptrCast(opt), opt.len);\n            },\n            else => |T| @compileError(\"expected type '*const blst.P1_Affine' \"\n                                      ++ \"or '*const blst.P2_Affine', found '\"\n                                      ++ @typeName(T) ++ \"'\"),\n        }\n\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn commit(self: *Pairing) void {\n        c.pairing_commit(@ptrCast(self.ctx));\n    }\n\n    pub fn merge(self: *Pairing, second: *const Pairing) ERROR {\n        return c.pairing_merge(@ptrCast(self.ctx), @ptrCast(second.ctx));\n    }\n\n    pub fn finalverify(self: *Pairing, optional: ?*const PT) bool {\n        return c.pairing_finalverify(@ptrCast(self.ctx), optional);\n    }\n\n    pub fn raw_aggregate(self: *Pairing, q: *const P2_Affine,\n                                         p: *const P1_Affine) void {\n        c.pairing_raw_aggregate(@ptrCast(self.ctx), &q.point, &p.point);\n    }\n\n    pub fn as_fp12(self: *Pairing) *const PT {\n        return c.pairing_as_fp12(@ptrCast(self.ctx));\n    }\n};\n\npub const Uniq = struct {\n    tree: []u64 = &[_]u64{},\n    allocator: std.mem.Allocator,\n\n    pub fn init(n: usize, allocator: std.mem.Allocator) !Uniq {\n        const nlimbs = (c.uniq_sizeof(n) + @sizeOf(u64) - 1) / @sizeOf(u64);\n        const buffer = try allocator.alloc(u64, nlimbs);\n\n        c.uniq_init(@ptrCast(buffer));\n\n        return Uniq{\n            .tree = buffer,\n            .allocator = allocator,\n        };\n    }\n\n    pub fn deinit(self: *Uniq) void {\n        self.allocator.free(self.tree);\n        self.tree = &[_]u64{};\n    }\n\n    pub fn is_uniq(self: *Uniq, msg: []const u8) bool {\n        return c.uniq_test(@ptrCast(self.tree), @ptrCast(msg), msg.len);\n    }\n};\n\nconst FP_BYTES = 384/8;\npub const P1_COMPRESS_BYTES  = FP_BYTES;\npub const P1_SERIALIZE_BYTES = FP_BYTES*2;\npub const P2_COMPRESS_BYTES  = FP_BYTES*2;\npub const P2_SERIALIZE_BYTES = FP_BYTES*4;\n\"\"\"\np1_zig = \"\"\"\npub const P1_Affine = struct {\n    point: c.p1_affine = c.p1_affine{},\n\n    pub fn from(in: anytype) !P1_Affine {\n        switch (@TypeOf(in)) {\n            *const P1,\n            *P1  => return in.to_affine(),\n            P1   => @compileError(\"expected type '*const blst.P1', found 'blst.P1'\"),\n            else => |T| {\n                switch (@typeInfo(T)) {\n                    .pointer => { const s: []const u8 = in; _ = s; },\n                    else     => @compileError(\"expected type '[]const u8', found '\" ++ @typeName(T) ++ \"'\"),\n                }\n\n                var ret: P1_Affine = undefined;\n                const err = ret.deserialize(in);\n                return if (err == .SUCCESS) ret else err.as_error();\n            },\n        }\n        unreachable;\n    }\n\n    pub fn deserialize(self: *P1_Affine, in: []const u8) ERROR {\n        if (in.len == 0) {\n            return .BAD_ENCODING;\n        }\n        const expected = @as(usize, if (in[0]&0x80 != 0) P1_COMPRESS_BYTES\n                                    else                 P1_SERIALIZE_BYTES);\n        if (in.len != expected) {\n            return .BAD_ENCODING;\n        }\n        const err = c.p1_deserialize(&self.point, &in[0]);\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn serialize(self: *const P1_Affine) [P1_SERIALIZE_BYTES]u8 {\n        var ret: [P1_SERIALIZE_BYTES]u8 = undefined;\n        c.p1_affine_serialize(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn compress(self: *const P1_Affine) [P1_COMPRESS_BYTES]u8 {\n        var ret: [P1_COMPRESS_BYTES]u8 = undefined;\n        c.p1_affine_compress(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn dup(self: *const P1_Affine) P1_Affine {\n        return self.*;\n    }\n\n    pub fn on_curve(self: *const P1_Affine) bool {\n        return c.p1_affine_on_curve(&self.point);\n    }\n\n    pub fn in_group(self: *const P1_Affine) bool {\n        return c.p1_affine_in_g1(&self.point);\n    }\n\n    pub fn is_inf(self: *const P1_Affine) bool {\n        return c.p1_affine_is_inf(&self.point);\n    }\n\n    pub fn is_equal(self: *const P1_Affine, p: *const P1_Affine) bool {\n        return c.p1_affine_is_equal(&self.point, &p.point);\n    }\n\n    pub fn core_verify(self: *const P1_Affine, pk: *const P2_Affine,\n                       hash_or_encode: bool, msg: []const u8, DST: []const u8,\n                       aug: ?[]const u8) ERROR {\n        const opt = aug orelse &[_]u8{};\n        const err = c.core_verify_pk_in_g2(&pk.point, &self.point,\n                                           hash_or_encode,\n                                           @ptrCast(msg), msg.len,\n                                           @ptrCast(DST), DST.len,\n                                           @ptrCast(opt), opt.len);\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn generator() P1_Affine {\n        return P1_Affine{\n            .point = c.p1_affine_generator().*,\n        };\n    }\n\n    pub fn to_jacobian(self: *const P1_Affine) P1 {\n        var ret: P1 = undefined;\n        c.p1_from_affine(&ret.point, &self.point);\n        return ret;\n    }\n};\n\npub const P1 = struct {\n    point: c.p1 = c.p1{},\n\n    pub fn from(in: anytype) !P1 {\n        switch (@TypeOf(in)) {\n            *const SecretKey,\n            *SecretKey  => return P1.public_key(in),\n            SecretKey   => @compileError(\"expected type '*const blst.SecretKey', found 'blst.SecretKey'\"),\n            *const P1_Affine,\n            *P1_Affine  => return in.to_jacobian(),\n            P1_Affine   => @compileError(\"expected type '*const blst.P1_Affine', found 'blst.P1_Affine'\"),\n            else        => |T| {\n                switch (@typeInfo(T)) {\n                    .pointer => { const s: []const u8 = in; _ = s; },\n                    else     => @compileError(\"expected type '[]const u8', found '\" ++ @typeName(T) ++ \"'\"),\n                }\n\n                var ret: P1 = undefined;\n                const err = ret.deserialize(in);\n                return if (err == .SUCCESS) ret else err.as_error();\n            },\n        }\n        unreachable;\n    }\n\n    pub fn deserialize(self: *P1, in: []const u8) ERROR {\n        if (in.len == 0) {\n            return .BAD_ENCODING;\n        }\n        const expected = @as(usize, if (in[0]&0x80 != 0) P1_COMPRESS_BYTES\n                                    else                 P1_SERIALIZE_BYTES);\n        if (in.len != expected) {\n            return .BAD_ENCODING;\n        }\n        const err = c.p1_deserialize(@ptrCast(&self.point), &in[0]);\n        if (err == c.SUCCESS) {\n            c.p1_from_affine(&self.point, @ptrCast(&self.point));\n        }\n        return @as(ERROR, @enumFromInt(err));\n    }\n\n    pub fn serialize(self: *const P1) [P1_SERIALIZE_BYTES]u8 {\n        var ret: [P1_SERIALIZE_BYTES]u8 = undefined;\n        c.p1_serialize(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn compress(self: *const P1) [P1_COMPRESS_BYTES]u8 {\n        var ret: [P1_COMPRESS_BYTES]u8 = undefined;\n        c.p1_compress(&ret[0], &self.point);\n        return ret;\n    }\n\n    pub fn public_key(sk: *const SecretKey) P1 {\n        var ret: P1 = undefined;\n        c.sk_to_pk_in_g1(&ret.point, &sk.key);\n        return ret;\n    }\n\n    pub fn dup(self: *const P1) P1 {\n        return self.*;\n    }\n\n    pub fn on_curve(self: *const P1) bool {\n        return c.p1_on_curve(&self.point);\n    }\n\n    pub fn in_group(self: *const P1) bool {\n        return c.p1_in_g1(&self.point);\n    }\n\n    pub fn is_inf(self: *const P1) bool {\n        return c.p1_is_inf(&self.point);\n    }\n\n    pub fn is_equal(self: *const P1, p: *const P1) bool {\n        return c.p1_is_equal(&self.point, &p.point);\n    }\n\n    pub fn aggregate(self: *P1, p: *const P1_Affine) !void {\n        if (!c.p1_affine_in_g1(&p.point)) {\n            return Error.POINT_NOT_IN_GROUP;\n        }\n        c.p1_add_or_double_affine(&self.point, &self.point, &p.point);\n    }\n\n    pub fn hash_to(msg: []const u8, DST: []const u8, aug: ?[]const u8) P1 {\n        const opt = aug orelse &[_]u8{};\n        var ret: P1 = undefined;\n\n        c.hash_to_g1(&ret.point, @ptrCast(msg), msg.len,\n                                 @ptrCast(DST), DST.len,\n                                 @ptrCast(opt), opt.len);\n        return ret;\n    }\n\n    pub fn encode_to(msg: []const u8, DST: []const u8, aug: ?[]const u8) P1 {\n        const opt = aug orelse &[_]u8{};\n        var ret: P1 = undefined;\n\n        c.encode_to_g1(&ret.point, @ptrCast(msg), msg.len,\n                                   @ptrCast(DST), DST.len,\n                                   @ptrCast(opt), opt.len);\n        return ret;\n    }\n\n    pub fn sign_with(self: *const P1, sk: *const SecretKey) *P1 {\n        c.sign_pk_in_g2(@constCast(&self.point), &self.point, &sk.key);\n        return @constCast(self);\n    }\n\n    pub fn to_affine(self: *const P1) P1_Affine {\n        var ret: P1_Affine = undefined;\n        c.p1_to_affine(&ret.point, &self.point);\n        return ret;\n    }\n\n    pub fn generator() P1 {\n        return P1{\n            .point = c.p1_generator().*,\n        };\n    }\n};\n\"\"\"\nhere = re.split(r'/(?=[^/]*$)', sys.argv[0])\nif len(here) > 1:\n    os.chdir(here[0])\n\n\ndef xchg_1vs2(matchobj):\n    if matchobj.group(2) == '1':\n        return matchobj.group(1) + '2'\n    else:\n        return matchobj.group(1) + '1'\n\n\nprint(\"generating blst.zig...\") or sys.stdout.flush()\nwith open(\"blst.zig\", \"w\") as fd:\n    print(\"//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fd)\n    print(\"// DO NOT EDIT THIS FILE!!!\",                         file=fd)\n    print(\"// The file is auto-generated by \" + here[-1],        file=fd)\n    print(\"//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\", file=fd)\n    print(top_zig,                                               file=fd)\n    print(p1_zig,                                                file=fd)\n    print(re.sub(r'((?<!f)[pgPG\\*])([12])', xchg_1vs2, p1_zig),  file=fd)\n\n\ndef newer(*files):\n    assert len(files) > 1\n    rh = files[-1]\n    if not os.path.exists(rh):\n        return True\n    for lh in files[:-1]:\n        if os.stat(lh).st_mtime > os.stat(rh).st_mtime:\n            return True\n    return False\n\n\nif newer(\"../blst.h\", \"c.zig\"):\n    print(\"generating c.zig...\") or sys.stdout.flush()\n    ret = subprocess.run([\"zig\", \"translate-c\", \"../blst.h\", \"-D__BLST_ZIG__\"],\n                         capture_output=True, text=True)\n    with open(\"c.zig\", \"w\") as fd:\n        pubs = {}\n        print(\"// automatically generated with 'zig translate-c'\", file=fd)\n        for line in ret.stdout.splitlines():\n            if \"no file\" in line:\n                break\n            elif not line.startswith(\"pub const _\"):\n                m = re.match(r'^pub ([\\w\\s]+? ((?:blst|BLST)_(\\w+)).*)', line)\n                if m:\n                    print(m.group(1), file=fd)\n                    pubs[m.group(3)] = m.group(2)\n                else:\n                    print(line, file=fd)\n        print(\"// reexport symbols without blst_ prefix\", file=fd)\n        for key, val in pubs.items():\n            print(\"pub const {} = {};\".format(key, val), file=fd)\n        del pubs\n\nversion = \"0.3.16\"\n\nos.chdir(\"../..\")\ntry:\n    with open(\"build.zig.zon\", \"r\") as fd:\n        m = re.search(r'\\.version = \"([^\"]+)\"', fd.read())\n        if m and m.group(1) == version:\n            sys.exit(0)\nexcept OSError as e:\n    if e.errno != 2:    # not \"no such file or directory\"\n        raise e\n\nprint(\"generating build.zig.zon...\") or sys.stdout.flush()\nzon = \"\"\".{\n    .name = .blst,\n    .version = \"%s\",\n    .minimum_zig_version = \"0.14.0\",\n    .paths = .{\n        \"build.zig\",\n        \"build.zig.zon\",\n        \"bindings/zig\",\n        \"src\",\n        \"build\",\n    },\n\"\"\" % version\n\nwith open(\"build.zig.zon\", \"w\") as fd:\n    print(zon, end='', file=fd)\n    print(\"}\", file=fd)\n\nret = subprocess.run([\"zig\", \"build\"], capture_output=True, text=True)\nmatch = re.search(r'suggested value:\\s*(\\w+)', ret.stderr)\nif match:\n    with open(\"build.zig.zon\", \"w\") as fd:\n        print(zon, end='', file=fd)\n        print(\"    .fingerprint = {},\".format(match.group(1)), file=fd)\n        print(\"}\", file=fd)\nelse:\n    print(\"don't know what to do\")\n"
  },
  {
    "path": "bindings/zig/tests.zig",
    "content": "const std = @import(\"std\");\nconst blst = @import(\"blst\");\n\ntest \"sign/verify\" {\n    const password = [_]u8{'*'} ** 32;\n\n    var SK = blst.SecretKey{};\n    SK.keygen(&password, null);\n    defer SK.deinit();\n\n    const msg = \"assertion\";\n    const DST = \"MY-DST\";\n\n    // on the \"sender\" side...\n\n    const pk_for_wire  = (try blst.P1.from(&SK)).serialize();\n    const sig_for_wire = blst.P2.hash_to(msg, DST, &pk_for_wire).sign_with(&SK).serialize();\n\n    // ... and now on the \"receiver\" side...\n\n    const sig = try blst.P2_Affine.from(&sig_for_wire);\n    const pk  = try blst.P1_Affine.from(&pk_for_wire);\n\n    const ret = sig.core_verify(&pk, true, msg, DST, &pk_for_wire);\n\n    try std.testing.expectEqual(ret, .SUCCESS);\n}\n\ntest \"uniq\" {\n    const msgs = &[_][]const u8 {\n        \"three\", \"two\", \"one\", \"three\",\n    };\n\n    var ctx = try blst.Uniq.init(msgs.len, std.testing.allocator);\n    defer ctx.deinit();\n\n    for (msgs, 1..) |msg, next| {\n        try std.testing.expectEqual(ctx.is_uniq(msg), next < msgs.len);\n    }\n}\n\nfn box(allocator: std.mem.Allocator, src: []const u8) ![]u8 {\n    const ret = try allocator.alloc(u8, src.len);\n    @memcpy(ret, src);\n    return ret;\n}\n\ntest \"aggregateverify\" {\n    const mem = std.testing.allocator;\n    const N = 3;\n\n    const password = [_]u8{'*'} ** 32;\n    var SK = blst.SecretKey{};\n    defer SK.deinit();\n\n    // emulate N \"senders\"...\n\n    const DST = \"MY-DST\";\n    var pks: [N][]const u8 = undefined;\n    var sigs: [N][]const u8 = undefined;\n    var msgs: [N][]const u8 = undefined;\n\n    for (0..N) |i| {\n        msgs[i] = try std.fmt.allocPrint(mem, \"assertion{}\", .{i});\n        SK.keygen(&password, msgs[i]);\n        pks[i] = try box(mem, &(try blst.P1.from(&SK)).serialize());\n        sigs[i] = try box(mem, &blst.P2.hash_to(msgs[i], DST, null).sign_with(&SK).serialize());\n    }\n\n    // ... basic scheme on the \"receiver\" side.\n\n    var uniq = try blst.Uniq.init(msgs.len, mem);\n    defer uniq.deinit();\n\n    // The basic scheme requires messages to be checked for uniqueness.\n    for (0..N) |i| {\n        try std.testing.expectEqual(uniq.is_uniq(msgs[i]), true);\n    }\n\n    var aggregated = try blst.P2.from(sigs[0]);\n    try std.testing.expectEqual(aggregated.in_group(), true);\n    for (1..N) |i| {\n        try aggregated.aggregate(&try blst.P2_Affine.from(sigs[i]));\n    }\n\n    var ctx = try blst.Pairing.init(true, DST, mem);\n    defer ctx.deinit();\n\n    // The below .aggregate() method doesn't vet public keys with\n    // rationale that application would cache the results of the\n    // group checks. Hence they need to be vetted separately.\n    var pk = try blst.P1_Affine.from(pks[0]);\n    try std.testing.expectEqual(pk.in_group(), true);\n    try std.testing.expectEqual(ctx.aggregate(&pk, &aggregated.to_affine(), msgs[0], null),\n                                .SUCCESS);\n    for (1..N) |i| {\n        pk = try blst.P1_Affine.from(pks[i]);\n        try std.testing.expectEqual(pk.in_group(), true);\n        try std.testing.expectEqual(ctx.aggregate(&pk, null, msgs[i], null),\n                                    .SUCCESS);\n    }\n\n    ctx.commit();\n    try std.testing.expectEqual(ctx.finalverify(null), true);\n\n    for (0..N) |i| {\n        mem.free(pks[i]);\n        mem.free(sigs[i]);\n        mem.free(msgs[i]);\n    }\n}\n"
  },
  {
    "path": "build/assembly.S",
    "content": "#if defined(__x86_64) || defined(__x86_64__)\n# if defined(__ELF__)\n#  if defined(__BLST_PORTABLE__)\n#   include \"elf/sha256-portable-x86_64.s\"\n#   define blst_sha256_block_data_order blst_sha256_block_ssse3\n#  endif\n#  include \"elf/sha256-x86_64.s\"\n#  if defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"elf/ctx_inverse_mod_384-x86_64.s\"\n#  endif\n#  if !defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"elf/ctq_inverse_mod_384-x86_64.s\"\n#  endif\n#  include \"elf/add_mod_384-x86_64.s\"\n#  include \"elf/add_mod_384x384-x86_64.s\"\n#  if defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"elf/mulx_mont_384-x86_64.s\"\n#   include \"elf/mulx_mont_256-x86_64.s\"\n#  endif\n#  if !defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"elf/mulq_mont_384-x86_64.s\"\n#   include \"elf/mulq_mont_256-x86_64.s\"\n#  endif\n#  include \"elf/add_mod_256-x86_64.s\"\n#  include \"elf/ct_inverse_mod_256-x86_64.s\"\n#  include \"elf/div3w-x86_64.s\"\n#  include \"elf/ct_is_square_mod_384-x86_64.s\"\n# elif defined(_WIN64) || defined(__CYGWIN__)\n#  if defined(__BLST_PORTABLE__)\n#   include \"coff/sha256-portable-x86_64.s\"\n#   define blst_sha256_block_data_order                     blst_sha256_block_ssse3\n#   define LSEH_begin_blst_sha256_block_data_order          LSEH_begin_blst_sha256_block_ssse3\n#   define LSEH_body_blst_sha256_block_data_order           LSEH_body_blst_sha256_block_ssse3\n#   define LSEH_info_blst_sha256_block_data_order_prologue  LSEH_info_blst_sha256_block_ssse3_prologue\n#   define LSEH_body_blst_sha256_block_data_order           LSEH_body_blst_sha256_block_ssse3\n#   define LSEH_epilogue_blst_sha256_block_data_order       LSEH_epilogue_blst_sha256_block_ssse3\n#   define LSEH_info_blst_sha256_block_data_order_body      LSEH_info_blst_sha256_block_ssse3_body\n#   define LSEH_epilogue_blst_sha256_block_data_order       LSEH_epilogue_blst_sha256_block_ssse3\n#   define LSEH_end_blst_sha256_block_data_order            LSEH_end_blst_sha256_block_ssse3\n#   define LSEH_info_blst_sha256_block_data_order_epilogue  LSEH_info_blst_sha256_block_ssse3_epilogue\n#  endif\n#  include \"coff/sha256-x86_64.s\"\n#  if defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"coff/ctx_inverse_mod_384-x86_64.s\"\n#  endif\n#  if !defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"coff/ctq_inverse_mod_384-x86_64.s\"\n#  endif\n#  include \"coff/add_mod_384-x86_64.s\"\n#  include \"coff/add_mod_384x384-x86_64.s\"\n#  if defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"coff/mulx_mont_384-x86_64.s\"\n#   include \"coff/mulx_mont_256-x86_64.s\"\n#  endif\n#  if !defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"coff/mulq_mont_384-x86_64.s\"\n#   include \"coff/mulq_mont_256-x86_64.s\"\n#  endif\n#  include \"coff/add_mod_256-x86_64.s\"\n#  include \"coff/ct_inverse_mod_256-x86_64.s\"\n#  include \"coff/div3w-x86_64.s\"\n#  include \"coff/ct_is_square_mod_384-x86_64.s\"\n# elif defined(__APPLE__)\n#  include \"mach-o/sha256-x86_64.s\"\n#  if defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"mach-o/ctx_inverse_mod_384-x86_64.s\"\n#  endif\n#  if !defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"mach-o/ctq_inverse_mod_384-x86_64.s\"\n#  endif\n#  include \"mach-o/add_mod_384-x86_64.s\"\n#  include \"mach-o/add_mod_384x384-x86_64.s\"\n#  if defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"mach-o/mulx_mont_384-x86_64.s\"\n#   include \"mach-o/mulx_mont_256-x86_64.s\"\n#  endif\n#  if !defined(__ADX__) || defined(__BLST_PORTABLE__)\n#   include \"mach-o/mulq_mont_384-x86_64.s\"\n#   include \"mach-o/mulq_mont_256-x86_64.s\"\n#  endif\n#  include \"mach-o/add_mod_256-x86_64.s\"\n#  include \"mach-o/ct_inverse_mod_256-x86_64.s\"\n#  include \"mach-o/div3w-x86_64.s\"\n#  include \"mach-o/ct_is_square_mod_384-x86_64.s\"\n# endif\n#elif defined(__aarch64__)\n# if defined(__CHERI_PURE_CAPABILITY__)\n#  include \"cheri/sha256-armv8.S\"\n#  include \"cheri/ct_inverse_mod_384-armv8.S\"\n#  include \"cheri/add_mod_384-armv8.S\"\n#  define __add_mod_384     __add_mont_384\n#  define __sub_mod_384     __sub_mont_384\n#  include \"cheri/mul_mont_384-armv8.S\"\n#  include \"cheri/mul_mont_256-armv8.S\"\n#  include \"cheri/add_mod_256-armv8.S\"\n#  include \"cheri/ct_inverse_mod_256-armv8.S\"\n#  include \"cheri/div3w-armv8.S\"\n#  include \"cheri/ct_is_square_mod_384-armv8.S\"\n# elif defined(__ELF__)\n#  include \"elf/sha256-armv8.S\"\n#  include \"elf/ct_inverse_mod_384-armv8.S\"\n#  include \"elf/add_mod_384-armv8.S\"\n#  define __add_mod_384     __add_mont_384\n#  define __sub_mod_384     __sub_mont_384\n#  include \"elf/mul_mont_384-armv8.S\"\n#  include \"elf/mul_mont_256-armv8.S\"\n#  include \"elf/add_mod_256-armv8.S\"\n#  include \"elf/ct_inverse_mod_256-armv8.S\"\n#  include \"elf/div3w-armv8.S\"\n#  include \"elf/ct_is_square_mod_384-armv8.S\"\n# elif defined(_WIN64)\n#  include \"coff/sha256-armv8.S\"\n#  include \"coff/ct_inverse_mod_384-armv8.S\"\n#  include \"coff/add_mod_384-armv8.S\"\n#  define __add_mod_384     __add_mont_384\n#  define __sub_mod_384     __sub_mont_384\n#  include \"coff/mul_mont_384-armv8.S\"\n#  include \"coff/mul_mont_256-armv8.S\"\n#  include \"coff/add_mod_256-armv8.S\"\n#  include \"coff/ct_inverse_mod_256-armv8.S\"\n#  include \"coff/div3w-armv8.S\"\n#  include \"coff/ct_is_square_mod_384-armv8.S\"\n# elif defined(__APPLE__)\n#  include \"mach-o/sha256-armv8.S\"\n#  include \"mach-o/ct_inverse_mod_384-armv8.S\"\n#  include \"mach-o/add_mod_384-armv8.S\"\n#  define __add_mod_384     __add_mont_384\n#  define __sub_mod_384     __sub_mont_384\n#  include \"mach-o/mul_mont_384-armv8.S\"\n#  include \"mach-o/mul_mont_256-armv8.S\"\n#  include \"mach-o/add_mod_256-armv8.S\"\n#  include \"mach-o/ct_inverse_mod_256-armv8.S\"\n#  include \"mach-o/div3w-armv8.S\"\n#  include \"mach-o/ct_is_square_mod_384-armv8.S\"\n# endif\n#elif defined(__BLST_NO_ASM__) || \\\n      (defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__==4)\n/* inaccurate way to detect a 32-bit processor, but it's close enough */\n#else\n# error \"unsupported platform\"\n#endif\n"
  },
  {
    "path": "build/bindings_trim.pl",
    "content": "#!/usr/bin/env perl\n\n# read whole file\nwhile(<>) { push @file, $_; }\n\n# traverse and remove auto-generated PartialEq for chosen types\nfor (my $i = 0; $i <= $#file; $i++) {\n    if (@file[$i] =~ m/pub\\s+(?:struct|enum)\\s+(\\w+)/) {\n        push @structs, $1;\n    }\n\n    if (@file[$i] =~ m/struct\\s+blst_p[12]/) {\n        @file[$i-1] =~ s/,\\s*PartialEq//;\n    } elsif (@file[$i] =~ m/struct\\s+blst_fp12/) {\n        @file[$i-1] =~ s/,\\s*(?:Default|PartialEq)//g;\n    } elsif (@file[$i] =~ m/struct\\s+(blst_pairing|blst_uniq)/) {\n        @file[$i-1] =~ s/,\\s*(?:Copy|Clone|Eq|PartialEq)//g;\n    } elsif (@file[$i] =~ m/struct\\s+blst_scalar/) {\n        @file[$i-1] =~ s/,\\s*Copy//;\n        @file[$i-1] =~ s/\\)/, Zeroize\\)/;\n        splice @file, $i, 0, \"#[zeroize(drop)]\\n\"; $i++;\n    } else {\n        @file[$i] =~ s/::std::/::core::/g;\n    }\n}\n\nprint @file;\n\nprint << '___';\n#[test]\nfn bindgen_test_normal_types() {\n    // from \"Rust for Rustaceans\" by Jon Gjengset\n    fn is_normal<T: Sized + Send + Sync + Unpin>() {}\n___\nfor (@structs) {\n    print \"    is_normal::<$_>();\\n\";\n}\nprint \"}\\n\";\n\nclose STDOUT;\n"
  },
  {
    "path": "build/cheri/add_mod_256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tadd_mod_256\n.hidden\tadd_mod_256\n.type\tadd_mod_256,%function\n.align\t5\nadd_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[c1]\n\tldp\tx12,x13,[c2]\n\n\tldp\tx10,x11,[c1,#16]\n\tadds\tx8,x8,x12\n\tldp\tx14,x15,[c2,#16]\n\tadcs\tx9,x9,x13\n\tldp\tx4,x5,[c3]\n\tadcs\tx10,x10,x14\n\tldp\tx6,x7,[c3,#16]\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[c0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[c0,#16]\n\n\tret\n.size\tadd_mod_256,.-add_mod_256\n\n.globl\tmul_by_3_mod_256\n.hidden\tmul_by_3_mod_256\n.type\tmul_by_3_mod_256,%function\n.align\t5\nmul_by_3_mod_256:\n\thint\t#34\n\tldp\tx12,x13,[c1]\n\tldp\tx14,x15,[c1,#16]\n\n\tadds\tx8,x12,x12\n\tldp\tx4,x5,[c2]\n\tadcs\tx9,x13,x13\n\tldp\tx6,x7,[c2,#16]\n\tadcs\tx10,x14,x14\n\tadcs\tx11,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\tadds\tx8,x8,x12\n\tadcs\tx9,x9,x13\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[c0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[c0,#16]\n\n\tret\n.size\tmul_by_3_mod_256,.-mul_by_3_mod_256\n\n.globl\tlshift_mod_256\n.hidden\tlshift_mod_256\n.type\tlshift_mod_256,%function\n.align\t5\nlshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[c1]\n\tldp\tx10,x11,[c1,#16]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\n.Loop_lshift_mod_256:\n\tadds\tx8,x8,x8\n\tsub\tx2,x2,#1\n\tadcs\tx9,x9,x9\n\tadcs\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx12,x8,x4\n\tsbcs\tx13,x9,x5\n\tsbcs\tx14,x10,x6\n\tsbcs\tx15,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x12,lo\n\tcsel\tx9,x9,x13,lo\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\n\tcbnz\tx2,.Loop_lshift_mod_256\n\n\tstp\tx8,x9,[c0]\n\tstp\tx10,x11,[c0,#16]\n\n\tret\n.size\tlshift_mod_256,.-lshift_mod_256\n\n.globl\trshift_mod_256\n.hidden\trshift_mod_256\n.type\trshift_mod_256,%function\n.align\t5\nrshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[c1]\n\tldp\tx10,x11,[c1,#16]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\n.Loop_rshift:\n\tadds\tx12,x8,x4\n\tsub\tx2,x2,#1\n\tadcs\tx13,x9,x5\n\tadcs\tx14,x10,x6\n\tadcs\tx15,x11,x7\n\tadc\tx3,xzr,xzr\n\ttst\tx8,#1\n\n\tcsel\tx12,x12,x8,ne\n\tcsel\tx13,x13,x9,ne\n\tcsel\tx14,x14,x10,ne\n\tcsel\tx15,x15,x11,ne\n\tcsel\tx3,x3,xzr,ne\n\n\textr\tx8,x13,x12,#1\n\textr\tx9,x14,x13,#1\n\textr\tx10,x15,x14,#1\n\textr\tx11,x3,x15,#1\n\n\tcbnz\tx2,.Loop_rshift\n\n\tstp\tx8,x9,[c0]\n\tstp\tx10,x11,[c0,#16]\n\n\tret\n.size\trshift_mod_256,.-rshift_mod_256\n\n.globl\tcneg_mod_256\n.hidden\tcneg_mod_256\n.type\tcneg_mod_256,%function\n.align\t5\ncneg_mod_256:\n\tldp\tx8,x9,[c1]\n\tldp\tx4,x5,[c3]\n\n\tldp\tx10,x11,[c1,#16]\n\tsubs\tx12,x4,x8\n\tldp\tx6,x7,[c3,#16]\n\torr\tx4,x8,x9\n\tsbcs\tx13,x5,x9\n\torr\tx5,x10,x11\n\tsbcs\tx14,x6,x10\n\torr\tx3,x4,x5\n\tsbc\tx15,x7,x11\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx8,x8,x12,eq\n\tcsel\tx9,x9,x13,eq\n\tcsel\tx10,x10,x14,eq\n\tstp\tx8,x9,[c0]\n\tcsel\tx11,x11,x15,eq\n\tstp\tx10,x11,[c0,#16]\n\n\tret\n.size\tcneg_mod_256,.-cneg_mod_256\n\n.globl\tsub_mod_256\n.hidden\tsub_mod_256\n.type\tsub_mod_256,%function\n.align\t5\nsub_mod_256:\n\tldp\tx8,x9,[c1]\n\tldp\tx12,x13,[c2]\n\n\tldp\tx10,x11,[c1,#16]\n\tsubs\tx8,x8,x12\n\tldp\tx14,x15,[c2,#16]\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[c3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[c3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tstp\tx8,x9,[c0]\n\tadc\tx11,x11,x7\n\tstp\tx10,x11,[c0,#16]\n\n\tret\n.size\tsub_mod_256,.-sub_mod_256\n\n.globl\tcheck_mod_256\n.hidden\tcheck_mod_256\n.type\tcheck_mod_256,%function\n.align\t5\ncheck_mod_256:\n\tldp\tx8,x9,[c0]\n\tldp\tx10,x11,[c0,#16]\n\tldp\tx4,x5,[c1]\n\tldp\tx6,x7,[c1,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tsubs\txzr,x8,x4\n\tsbcs\txzr,x9,x5\n\torr\tx8,x8,x9\n\tsbcs\txzr,x10,x6\n\torr\tx8,x8,x10\n\tsbcs\txzr,x11,x7\n\torr\tx8,x8,x11\n\tsbc\tx1,xzr,xzr\n\n\tcmp\tx8,#0\n\tmov\tx0,#1\n\tcsel\tx0,x0,xzr,ne\n\tand\tx0,x0,x1\n\n\tret\n.size\tcheck_mod_256,.-check_mod_256\n\n.globl\tadd_n_check_mod_256\n.hidden\tadd_n_check_mod_256\n.type\tadd_n_check_mod_256,%function\n.align\t5\nadd_n_check_mod_256:\n\tldp\tx8,x9,[c1]\n\tldp\tx12,x13,[c2]\n\tldp\tx10,x11,[c1,#16]\n\tldp\tx14,x15,[c2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tadds\tx8,x8,x12\n\tldp\tx4,x5,[c3]\n\tadcs\tx9,x9,x13\n\tldp\tx6,x7,[c3,#16]\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[c0]\n\tstp\tx10,x11,[c0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n.size\tadd_n_check_mod_256,.-add_n_check_mod_256\n\n.globl\tsub_n_check_mod_256\n.hidden\tsub_n_check_mod_256\n.type\tsub_n_check_mod_256,%function\n.align\t5\nsub_n_check_mod_256:\n\tldp\tx8,x9,[c1]\n\tldp\tx12,x13,[c2]\n\tldp\tx10,x11,[c1,#16]\n\tldp\tx14,x15,[c2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tsubs\tx8,x8,x12\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[c3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[c3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tadc\tx11,x11,x7\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[c0]\n\tstp\tx10,x11,[c0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n.size\tsub_n_check_mod_256,.-sub_n_check_mod_256\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/add_mod_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tadd_mod_384\n.hidden\tadd_mod_384\n.type\tadd_mod_384,%function\n.align\t5\nadd_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\tldp\tx8,x9,[c3,#32]\n\n\tbl\t__add_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tadd_mod_384,.-add_mod_384\n\n.type\t__add_mod_384,%function\n.align\t5\n__add_mod_384:\n\tldp\tx10,x11,[c1]\n\tldp\tx16,x17,[c2]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx19,x20,[c2,#16]\n\tldp\tx14,x15,[c1,#32]\n\tldp\tx21,x22,[c2,#32]\n\n__add_mod_384_ab_are_loaded:\n\tadds\tx10,x10,x16\n\tadcs\tx11,x11,x17\n\tadcs\tx12,x12,x19\n\tadcs\tx13,x13,x20\n\tadcs\tx14,x14,x21\n\tadcs\tx15,x15,x22\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.globl\tadd_mod_384x\n.hidden\tadd_mod_384x\n.type\tadd_mod_384x,%function\n.align\t5\nadd_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\tldp\tx8,x9,[c3,#32]\n\n\tbl\t__add_mod_384\n\n\tstp\tx10,x11,[c0]\n\tadd\tc1,c1,#48\n\tstp\tx12,x13,[c0,#16]\n\tadd\tc2,c2,#48\n\tstp\tx14,x15,[c0,#32]\n\n\tbl\t__add_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0,#48]\n\tstp\tx12,x13,[c0,#64]\n\tstp\tx14,x15,[c0,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tadd_mod_384x,.-add_mod_384x\n\n.globl\trshift_mod_384\n.hidden\trshift_mod_384\n.type\trshift_mod_384,%function\n.align\t5\nrshift_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\tldp\tx8,x9,[c3,#32]\n\n.Loop_rshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__rshift_mod_384\n\tcbnz\tx2,.Loop_rshift_mod_384\n\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\trshift_mod_384,.-rshift_mod_384\n\n.type\t__rshift_mod_384,%function\n.align\t5\n__rshift_mod_384:\n\tsbfx\tx22,x10,#0,#1\n\tand\tx16,x22,x4\n\tand\tx17,x22,x5\n\tadds\tx10,x10,x16\n\tand\tx19,x22,x6\n\tadcs\tx11,x11,x17\n\tand\tx20,x22,x7\n\tadcs\tx12,x12,x19\n\tand\tx21,x22,x8\n\tadcs\tx13,x13,x20\n\tand\tx22,x22,x9\n\tadcs\tx14,x14,x21\n\textr\tx10,x11,x10,#1\t// a[0:5] >>= 1\n\tadcs\tx15,x15,x22\n\textr\tx11,x12,x11,#1\n\tadc\tx22,xzr,xzr\n\textr\tx12,x13,x12,#1\n\textr\tx13,x14,x13,#1\n\textr\tx14,x15,x14,#1\n\textr\tx15,x22,x15,#1\n\tret\n.size\t__rshift_mod_384,.-__rshift_mod_384\n\n.globl\tdiv_by_2_mod_384\n.hidden\tdiv_by_2_mod_384\n.type\tdiv_by_2_mod_384,%function\n.align\t5\ndiv_by_2_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c2]\n\tldp\tx6,x7,[c2,#16]\n\tldp\tx8,x9,[c2,#32]\n\n\tbl\t__rshift_mod_384\n\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tdiv_by_2_mod_384,.-div_by_2_mod_384\n\n.globl\tlshift_mod_384\n.hidden\tlshift_mod_384\n.type\tlshift_mod_384,%function\n.align\t5\nlshift_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\tldp\tx8,x9,[c3,#32]\n\n.Loop_lshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__lshift_mod_384\n\tcbnz\tx2,.Loop_lshift_mod_384\n\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tlshift_mod_384,.-lshift_mod_384\n\n.type\t__lshift_mod_384,%function\n.align\t5\n__lshift_mod_384:\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n.size\t__lshift_mod_384,.-__lshift_mod_384\n\n.globl\tmul_by_3_mod_384\n.hidden\tmul_by_3_mod_384\n.type\tmul_by_3_mod_384,%function\n.align\t5\nmul_by_3_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c2]\n\tldp\tx6,x7,[c2,#16]\n\tldp\tx8,x9,[c2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[c1]\n\tldp\tx19,x20,[c1,#16]\n\tldp\tx21,x22,[c1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_3_mod_384,.-mul_by_3_mod_384\n\n.globl\tmul_by_8_mod_384\n.hidden\tmul_by_8_mod_384\n.type\tmul_by_8_mod_384,%function\n.align\t5\nmul_by_8_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c2]\n\tldp\tx6,x7,[c2,#16]\n\tldp\tx8,x9,[c2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_8_mod_384,.-mul_by_8_mod_384\n\n.globl\tmul_by_3_mod_384x\n.hidden\tmul_by_3_mod_384x\n.type\tmul_by_3_mod_384x,%function\n.align\t5\nmul_by_3_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c2]\n\tldp\tx6,x7,[c2,#16]\n\tldp\tx8,x9,[c2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[c1]\n\tldp\tx19,x20,[c1,#16]\n\tldp\tx21,x22,[c1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\n\tstp\tx10,x11,[c0]\n\tldp\tx10,x11,[c1,#48]\n\tstp\tx12,x13,[c0,#16]\n\tldp\tx12,x13,[c1,#64]\n\tstp\tx14,x15,[c0,#32]\n\tldp\tx14,x15,[c1,#80]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[c1,#48]\n\tldp\tx19,x20,[c1,#64]\n\tldp\tx21,x22,[c1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0,#48]\n\tstp\tx12,x13,[c0,#64]\n\tstp\tx14,x15,[c0,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_3_mod_384x,.-mul_by_3_mod_384x\n\n.globl\tmul_by_8_mod_384x\n.hidden\tmul_by_8_mod_384x\n.type\tmul_by_8_mod_384x,%function\n.align\t5\nmul_by_8_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx14,x15,[c1,#32]\n\n\tldp\tx4,x5,[c2]\n\tldp\tx6,x7,[c2,#16]\n\tldp\tx8,x9,[c2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\n\tstp\tx10,x11,[c0]\n\tldp\tx10,x11,[c1,#48]\n\tstp\tx12,x13,[c0,#16]\n\tldp\tx12,x13,[c1,#64]\n\tstp\tx14,x15,[c0,#32]\n\tldp\tx14,x15,[c1,#80]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0,#48]\n\tstp\tx12,x13,[c0,#64]\n\tstp\tx14,x15,[c0,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_8_mod_384x,.-mul_by_8_mod_384x\n\n.globl\tcneg_mod_384\n.hidden\tcneg_mod_384\n.type\tcneg_mod_384,%function\n.align\t5\ncneg_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldp\tx4,x5,[c3]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx6,x7,[c3,#16]\n\n\tsubs\tx16,x4,x10\n\tldp\tx14,x15,[c1,#32]\n\tldp\tx8,x9,[c3,#32]\n\torr\tx3,x10,x11\n\tsbcs\tx17,x5,x11\n\torr\tx3,x3,x12\n\tsbcs\tx19,x6,x12\n\torr\tx3,x3,x13\n\tsbcs\tx20,x7,x13\n\torr\tx3,x3,x14\n\tsbcs\tx21,x8,x14\n\torr\tx3,x3,x15\n\tsbc\tx22,x9,x15\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx10,x10,x16,eq\n\tcsel\tx11,x11,x17,eq\n\tcsel\tx12,x12,x19,eq\n\tcsel\tx13,x13,x20,eq\n\tstp\tx10,x11,[c0]\n\tcsel\tx14,x14,x21,eq\n\tstp\tx12,x13,[c0,#16]\n\tcsel\tx15,x15,x22,eq\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tcneg_mod_384,.-cneg_mod_384\n\n.globl\tsub_mod_384\n.hidden\tsub_mod_384\n.type\tsub_mod_384,%function\n.align\t5\nsub_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\tldp\tx8,x9,[c3,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\tstp\tx14,x15,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsub_mod_384,.-sub_mod_384\n\n.type\t__sub_mod_384,%function\n.align\t5\n__sub_mod_384:\n\tldp\tx10,x11,[c1]\n\tldp\tx16,x17,[c2]\n\tldp\tx12,x13,[c1,#16]\n\tldp\tx19,x20,[c2,#16]\n\tldp\tx14,x15,[c1,#32]\n\tldp\tx21,x22,[c2,#32]\n\n\tsubs\tx10,x10,x16\n\tsbcs\tx11,x11,x17\n\tsbcs\tx12,x12,x19\n\tsbcs\tx13,x13,x20\n\tsbcs\tx14,x14,x21\n\tsbcs\tx15,x15,x22\n\tsbc\tx3,xzr,xzr\n\n\tand\tx16,x4,x3\n\tand\tx17,x5,x3\n\tadds\tx10,x10,x16\n\tand\tx19,x6,x3\n\tadcs\tx11,x11,x17\n\tand\tx20,x7,x3\n\tadcs\tx12,x12,x19\n\tand\tx21,x8,x3\n\tadcs\tx13,x13,x20\n\tand\tx22,x9,x3\n\tadcs\tx14,x14,x21\n\tadc\tx15,x15,x22\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tsub_mod_384x\n.hidden\tsub_mod_384x\n.type\tsub_mod_384x,%function\n.align\t5\nsub_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[c3]\n\tldp\tx6,x7,[c3,#16]\n\tldp\tx8,x9,[c3,#32]\n\n\tbl\t__sub_mod_384\n\n\tstp\tx10,x11,[c0]\n\tadd\tc1,c1,#48\n\tstp\tx12,x13,[c0,#16]\n\tadd\tc2,c2,#48\n\tstp\tx14,x15,[c0,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0,#48]\n\tstp\tx12,x13,[c0,#64]\n\tstp\tx14,x15,[c0,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsub_mod_384x,.-sub_mod_384x\n\n.globl\tmul_by_1_plus_i_mod_384x\n.hidden\tmul_by_1_plus_i_mod_384x\n.type\tmul_by_1_plus_i_mod_384x,%function\n.align\t5\nmul_by_1_plus_i_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[c2]\n\tldp\tx6,x7,[c2,#16]\n\tldp\tx8,x9,[c2,#32]\n\tadd\tc2,c1,#48\n\n\tbl\t__sub_mod_384\t\t\t// a->re - a->im\n\n\tldp\tx16,x17,[c1]\n\tldp\tx19,x20,[c1,#16]\n\tldp\tx21,x22,[c1,#32]\n\tstp\tx10,x11,[c0]\n\tldp\tx10,x11,[c1,#48]\n\tstp\tx12,x13,[c0,#16]\n\tldp\tx12,x13,[c1,#64]\n\tstp\tx14,x15,[c0,#32]\n\tldp\tx14,x15,[c1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\t// a->re + a->im\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[c0,#48]\n\tstp\tx12,x13,[c0,#64]\n\tstp\tx14,x15,[c0,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x\n\n.globl\tsgn0_pty_mod_384\n.hidden\tsgn0_pty_mod_384\n.type\tsgn0_pty_mod_384,%function\n.align\t5\nsgn0_pty_mod_384:\n\thint\t#34\n\tldp\tx10,x11,[c0]\n\tldp\tx12,x13,[c0,#16]\n\tldp\tx14,x15,[c0,#32]\n\n\tldp\tx4,x5,[c1]\n\tldp\tx6,x7,[c1,#16]\n\tldp\tx8,x9,[c1,#32]\n\n\tand\tx0,x10,#1\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx3,x3,xzr\n\n\tmvn\tx3,x3\n\tand\tx3,x3,#2\n\torr\tx0,x0,x3\n\n\tret\n.size\tsgn0_pty_mod_384,.-sgn0_pty_mod_384\n\n.globl\tsgn0_pty_mod_384x\n.hidden\tsgn0_pty_mod_384x\n.type\tsgn0_pty_mod_384x,%function\n.align\t5\nsgn0_pty_mod_384x:\n\thint\t#34\n\tldp\tx10,x11,[c0]\n\tldp\tx12,x13,[c0,#16]\n\tldp\tx14,x15,[c0,#32]\n\n\tldp\tx4,x5,[c1]\n\tldp\tx6,x7,[c1,#16]\n\tldp\tx8,x9,[c1,#32]\n\n\tand\tx2,x10,#1\n\torr\tx3,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx3,x3,x12\n\tadcs\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tldp\tx10,x11,[c0,#48]\n\tldp\tx12,x13,[c0,#64]\n\tldp\tx14,x15,[c0,#80]\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx2,x2,x16\n\n\tand\tx0,x10,#1\n\torr\tx1,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx1,x1,x12\n\tadcs\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx0,x0,x16\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t// pack sign and parity\n\n\tret\n.size\tsgn0_pty_mod_384x,.-sgn0_pty_mod_384x\n.globl\tvec_select_32\n.hidden\tvec_select_32\n.type\tvec_select_32,%function\n.align\t5\nvec_select_32:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d}, [c1]\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d}, [c2]\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tst1\t{v0.2d, v1.2d}, [c0]\n\tret\n.size\tvec_select_32,.-vec_select_32\n.globl\tvec_select_48\n.hidden\tvec_select_48\n.type\tvec_select_48,%function\n.align\t5\nvec_select_48:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0]\n\tret\n.size\tvec_select_48,.-vec_select_48\n.globl\tvec_select_96\n.hidden\tvec_select_96\n.type\tvec_select_96,%function\n.align\t5\nvec_select_96:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0]\n\tret\n.size\tvec_select_96,.-vec_select_96\n.globl\tvec_select_192\n.hidden\tvec_select_192\n.type\tvec_select_192,%function\n.align\t5\nvec_select_192:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0]\n\tret\n.size\tvec_select_192,.-vec_select_192\n.globl\tvec_select_144\n.hidden\tvec_select_144\n.type\tvec_select_144,%function\n.align\t5\nvec_select_144:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0]\n\tret\n.size\tvec_select_144,.-vec_select_144\n.globl\tvec_select_288\n.hidden\tvec_select_288\n.type\tvec_select_288,%function\n.align\t5\nvec_select_288:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [c1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [c2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [c1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [c2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [c0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [c0]\n\tret\n.size\tvec_select_288,.-vec_select_288\n.globl\tvec_prefetch\n.hidden\tvec_prefetch\n.type\tvec_prefetch,%function\n.align\t5\nvec_prefetch:\n\thint\t#34\n\tadd\tx1, x1, x0\n\tsub\tx1, x1, #1\n\tmov\tx2, #64\n\tprfm\tpldl1keep, [c0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [c0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [c0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [c0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [c0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [c0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tprfm\tpldl1keep, [c0]\n\tret\n.size\tvec_prefetch,.-vec_prefetch\n.globl\tvec_is_zero_16x\n.hidden\tvec_is_zero_16x\n.type\tvec_is_zero_16x,%function\n.align\t5\nvec_is_zero_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [c0], #16\n\tlsr\tx1, x1, #4\n\tsub\tx1, x1, #1\n\tcbz\tx1, .Loop_is_zero_done\n\n.Loop_is_zero:\n\tld1\t{v1.2d}, [c0], #16\n\torr\tv0.16b, v0.16b, v1.16b\n\tsub\tx1, x1, #1\n\tcbnz\tx1, .Loop_is_zero\n\n.Loop_is_zero_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n.size\tvec_is_zero_16x,.-vec_is_zero_16x\n.globl\tvec_is_equal_16x\n.hidden\tvec_is_equal_16x\n.type\tvec_is_equal_16x,%function\n.align\t5\nvec_is_equal_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [c0], #16\n\tld1\t{v1.2d}, [c1], #16\n\tlsr\tx2, x2, #4\n\teor\tv0.16b, v0.16b, v1.16b\n\n.Loop_is_equal:\n\tsub\tx2, x2, #1\n\tcbz\tx2, .Loop_is_equal_done\n\tld1\t{v1.2d}, [c0], #16\n\tld1\t{v2.2d}, [c1], #16\n\teor\tv1.16b, v1.16b, v2.16b\n\torr\tv0.16b, v0.16b, v1.16b\n\tb\t.Loop_is_equal\n\tnop\n\n.Loop_is_equal_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n.size\tvec_is_equal_16x,.-vec_is_equal_16x\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/ct_inverse_mod_256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tct_inverse_mod_256\n.hidden\tct_inverse_mod_256\n.type\tct_inverse_mod_256, %function\n.align\t5\nct_inverse_mod_256:\n\thint\t#PACI_HINT\n\tstp\tc29, c30, [csp,#-10*__SIZEOF_POINTER__]!\n\tadd\tc29, csp, #0\n\tstp\tc19, c20, [csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21, c22, [csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23, c24, [csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25, c26, [csp,#8*__SIZEOF_POINTER__]\n\tsub\tcsp, csp, #1040\n\n\tldp\tx4, x5, [c1,#8*0]\n\tldp\tx6, x7, [c1,#8*2]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tc1,csp,#16+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #16+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstr\tc0, [csp]\t\t// offload out_ptr\n\n\tldp\tx8, x9, [c2,#8*0]\n\tldp\tx10, x11, [c2,#8*2]\n\n\tstp\tx4, x5, [c1,#8*0]\t// copy input to |a|\n\tstp\tx6, x7, [c1,#8*2]\n\tstp\tx8, x9, [c1,#8*4]\t// copy modulus to |b|\n\tstp\tx10, x11, [c1,#8*6]\n\n\t////////////////////////////////////////// first iteration\n\tbl\t.Lab_approximation_31_256_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12,[c0,#8*8]\t\t// initialize |u| with |f0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12, [c0,#8*10]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tldr\tx8, [c1,#8*8]\t\t// |u|\n\tldr\tx9, [c1,#8*14]\t// |v|\n\tmadd\tx4, x16, x8, xzr\t// |u|*|f0|\n\tmadd\tx4, x17, x9, x4\t// |v|*|g0|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [c0,#8*4]\n\tstp\tx5, x5, [c0,#8*6]\n\n\tmadd\tx4, x12, x8, xzr\t// |u|*|f1|\n\tmadd\tx4, x13, x9, x4\t// |v|*|g1|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [c0,#8*10]\n\tstp\tx5, x5, [c0,#8*12]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\n\tstr\tx24, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\t\t// sign extension\n\tstp\tx24, x24, [c0,#8*4]\n\tstp\tx24, x24, [c0,#8*6]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tc0,c0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [c0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\t////////////////////////////////////////// two[!] last iterations\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #47\t\t\t// 31 + 512 % 31\n\t//bl\t__ab_approximation_62_256\t// |a| and |b| are exact,\n\tldr\tx7, [c1,#8*0]\t\t// just load\n\tldr\tx11, [c1,#8*4]\n\tbl\t__inner_loop_62_256\n\n\tmov\tx16, x14\n\tmov\tx17, x15\n\tldr\tc0, [csp]\t\t\t// original out_ptr\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\tldr\tc30, [c29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx20, x7, x17\t\t// figure out top-most limb\n\tldp\tx8, x9, [c3,#8*0]\n\tadc\tx23, x23, x25\n\tldp\tx10, x11, [c3,#8*2]\n\n\tadd\tx20, x20, x23\t\t// x20 is 1, 0 or -1\n\tasr\tx19, x20, #63\t\t// sign as mask\n\n\tand\tx23,   x8, x19\t\t// add mod<<256 conditionally\n\tand\tx24,   x9, x19\n\tadds\tx4, x4, x23\n\tand\tx25,   x10, x19\n\tadcs\tx5, x5, x24\n\tand\tx26,   x11, x19\n\tadcs\tx6, x6, x25\n\tadcs\tx7, x22,   x26\n\tadc\tx20, x20, xzr\t\t// x20 is 1, 0 or -1\n\n\tneg\tx19, x20\n\torr\tx20, x20, x19\t\t// excess bit or sign as mask\n\tasr\tx19, x19, #63\t\t// excess bit as mask\n\n\tand\tx8, x8, x20\t\t// mask |mod|\n\tand\tx9, x9, x20\n\tand\tx10, x10, x20\n\tand\tx11, x11, x20\n\n\teor\tx8, x8, x19\t\t// conditionally negate |mod|\n\teor\tx9, x9, x19\n\tadds\tx8, x8, x19, lsr#63\n\teor\tx10, x10, x19\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x19\n\tadcs\tx10, x10, xzr\n\tadc\tx11, x11, xzr\n\n\tadds\tx4, x4, x8\t// final adjustment for |mod|<<256\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [c0,#8*4]\n\tadc\tx7, x7, x11\n\tstp\tx6, x7, [c0,#8*6]\n\n\tadd\tcsp, csp, #1040\n\tldp\tc19, c20, [c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21, c22, [c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23, c24, [c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25, c26, [c29,#8*__SIZEOF_POINTER__]\n\tldr\tc29, [csp],#10*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tct_inverse_mod_256,.-ct_inverse_mod_256\n\n////////////////////////////////////////////////////////////////////////\n.type\t__smul_256x63, %function\n.align\t5\n__smul_256x63:\n\tldp\tx4, x5, [c1,#8*0+64]\t// load |u| (or |v|)\n\tasr\tx14, x16, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx6, x7, [c1,#8*2+64]\n\teor\tx16, x16, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx22, [c1,#8*4+64]\n\n\teor\tx4, x4, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx16, x16, x14\n\teor\tx5, x5, x14\n\tadds\tx4, x4, x14, lsr#63\n\teor\tx6, x6, x14\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\teor\tx22, x22, x14\n\tumulh\tx19, x4, x16\n\tadcs\tx7, x7, xzr\n\tumulh\tx20, x5, x16\n\tadcs\tx22, x22, xzr\n\tumulh\tx21, x6, x16\n\tmul\tx4, x4, x16\n\tcmp\tx16, #0\n\tmul\tx5, x5, x16\n\tcsel\tx22, x22, xzr, ne\n\tmul\tx6, x6, x16\n\tadds\tx5, x5, x19\n\tmul\tx24, x7, x16\n\tadcs\tx6, x6, x20\n\tadcs\tx24, x24, x21\n\tadc\tx26, xzr, xzr\n\tldp\tx8, x9, [c1,#8*0+112]\t// load |u| (or |v|)\n\tasr\tx14, x17, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx10, x11, [c1,#8*2+112]\n\teor\tx17, x17, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx23, [c1,#8*4+112]\n\n\teor\tx8, x8, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx17, x17, x14\n\teor\tx9, x9, x14\n\tadds\tx8, x8, x14, lsr#63\n\teor\tx10, x10, x14\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x14\n\tadcs\tx10, x10, xzr\n\teor\tx23, x23, x14\n\tumulh\tx19, x8, x17\n\tadcs\tx11, x11, xzr\n\tumulh\tx20, x9, x17\n\tadcs\tx23, x23, xzr\n\tumulh\tx21, x10, x17\n\tadc\tx15, xzr, xzr\t\t// used in __smul_512x63_tail\n\tmul\tx8, x8, x17\n\tcmp\tx17, #0\n\tmul\tx9, x9, x17\n\tcsel\tx23, x23, xzr, ne\n\tmul\tx10, x10, x17\n\tadds\tx9, x9, x19\n\tmul\tx25, x11, x17\n\tadcs\tx10, x10, x20\n\tadcs\tx25, x25, x21\n\tadc\tx26, x26, xzr\n\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [c0,#8*0]\n\tadcs\tx24,   x24,   x25\n\tstp\tx6, x24, [c0,#8*2]\n\n\tret\n.size\t__smul_256x63,.-__smul_256x63\n\n.type\t__smul_512x63_tail, %function\n.align\t5\n__smul_512x63_tail:\n\tumulh\tx24, x7, x16\n\tldr\tx5, [c1,#8*19]\t// load rest of |v|\n\tadc\tx26, x26, xzr\n\tldp\tx6, x7, [c1,#8*20]\n\tand\tx22, x22, x16\n\n\tumulh\tx11, x11, x17\t// resume |v|*|g1| chain\n\n\tsub\tx24, x24, x22\t// tie up |u|*|f1| chain\n\tasr\tx25, x24, #63\n\n\teor\tx5, x5, x14\t// conditionally negate rest of |v|\n\teor\tx6, x6, x14\n\tadds\tx5, x5, x15\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\tumulh\tx19, x23,   x17\n\tadc\tx7, x7, xzr\n\tumulh\tx20, x5, x17\n\tadd\tx11, x11, x26\n\tumulh\tx21, x6, x17\n\n\tmul\tx4, x23,   x17\n\tmul\tx5, x5, x17\n\tadds\tx4, x4, x11\n\tmul\tx6, x6, x17\n\tadcs\tx5, x5, x19\n\tmul\tx22,   x7, x17\n\tadcs\tx6, x6, x20\n\tadcs\tx22,   x22,   x21\n\tadc\tx23, xzr, xzr\t\t// used in the final step\n\n\tadds\tx4, x4, x24\n\tadcs\tx5, x5, x25\n\tadcs\tx6, x6, x25\n\tstp\tx4, x5, [c0,#8*4]\n\tadcs\tx22,   x22,   x25\t// carry is used in the final step\n\tstp\tx6, x22,   [c0,#8*6]\n\n\tret\n.size\t__smul_512x63_tail,.-__smul_512x63_tail\n\n.type\t__smul_256_n_shift_by_31, %function\n.align\t5\n__smul_256_n_shift_by_31:\n\tldp\tx4, x5, [c1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx24, x12, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx6, x7, [c1,#8*2+0]\n\teor\tx25, x12, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx4, x4, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx5, x5, x24\n\tadds\tx4, x4, x24, lsr#63\n\teor\tx6, x6, x24\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x24\n\tumulh\tx19, x4, x25\n\tadcs\tx6, x6, xzr\n\tumulh\tx20, x5, x25\n\tadc\tx7, x7, xzr\n\tumulh\tx21, x6, x25\n\tand\tx24, x24, x25\n\tumulh\tx22, x7, x25\n\tneg\tx24, x24\n\n\tmul\tx4, x4, x25\n\tmul\tx5, x5, x25\n\tmul\tx6, x6, x25\n\tadds\tx5, x5, x19\n\tmul\tx7, x7, x25\n\tadcs\tx6, x6, x20\n\tadcs\tx7, x7, x21\n\tadc\tx22, x22, x24\n\tldp\tx8, x9, [c1,#8*0+32]\t// load |a| (or |b|)\n\tasr\tx24, x13, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx10, x11, [c1,#8*2+32]\n\teor\tx25, x13, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx8, x8, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx9, x9, x24\n\tadds\tx8, x8, x24, lsr#63\n\teor\tx10, x10, x24\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x24\n\tumulh\tx19, x8, x25\n\tadcs\tx10, x10, xzr\n\tumulh\tx20, x9, x25\n\tadc\tx11, x11, xzr\n\tumulh\tx21, x10, x25\n\tand\tx24, x24, x25\n\tumulh\tx23, x11, x25\n\tneg\tx24, x24\n\n\tmul\tx8, x8, x25\n\tmul\tx9, x9, x25\n\tmul\tx10, x10, x25\n\tadds\tx9, x9, x19\n\tmul\tx11, x11, x25\n\tadcs\tx10, x10, x20\n\tadcs\tx11, x11, x21\n\tadc\tx23, x23, x24\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tadcs\tx7, x7, x11\n\tadc\tx8, x22,   x23\n\n\textr\tx4, x5, x4, #31\n\textr\tx5, x6, x5, #31\n\textr\tx6, x7, x6, #31\n\tasr\tx23, x8, #63\t// result's sign as mask\n\textr\tx7, x8, x7, #31\n\n\teor\tx4, x4, x23\t// ensure the result is positive\n\teor\tx5, x5, x23\n\tadds\tx4, x4, x23, lsr#63\n\teor\tx6, x6, x23\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x23\n\tadcs\tx6, x6, xzr\n\tstp\tx4, x5, [c0,#8*0]\n\tadc\tx7, x7, xzr\n\tstp\tx6, x7, [c0,#8*2]\n\n\teor\tx12, x12, x23\t\t// adjust |f/g| accordingly\n\teor\tx13, x13, x23\n\tsub\tx12, x12, x23\n\tsub\tx13, x13, x23\n\n\tret\n.size\t__smul_256_n_shift_by_31,.-__smul_256_n_shift_by_31\n.type\t__ab_approximation_31_256, %function\n.align\t4\n__ab_approximation_31_256:\n\tldp\tx6, x7, [c1,#8*2]\n\tldp\tx10, x11, [c1,#8*6]\n\tldp\tx4, x5, [c1,#8*0]\n\tldp\tx8, x9, [c1,#8*4]\n\n.Lab_approximation_31_256_loaded:\n\torr\tx19, x7, x11\t// check top-most limbs, ...\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x5, ne\n\torr\tx19, x7, x11\t// and ones before top-most, ...\n\tcsel\tx10, x10, x9, ne\n\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x4, ne\n\torr\tx19, x7, x11\t// and one more, ...\n\tcsel\tx10, x10, x8, ne\n\n\tclz\tx19, x19\n\tcmp\tx19, #64\n\tcsel\tx19, x19, xzr, ne\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tneg\tx20, x19\n\n\tlslv\tx7, x7, x19\t// align high limbs to the left\n\tlslv\tx11, x11, x19\n\tlsrv\tx6, x6, x20\n\tlsrv\tx10, x10, x20\n\tand\tx6, x6, x20, asr#6\n\tand\tx10, x10, x20, asr#6\n\torr\tx7, x7, x6\n\torr\tx11, x11, x10\n\n\tbfxil\tx7, x4, #0, #31\n\tbfxil\tx11, x8, #0, #31\n\n\tb\t__inner_loop_31_256\n\tret\n.size\t__ab_approximation_31_256,.-__ab_approximation_31_256\n\n.type\t__inner_loop_31_256, %function\n.align\t4\n__inner_loop_31_256:\n\tmov\tx2, #31\n\tmov\tx13, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx15, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx23,#0x7FFFFFFF7FFFFFFF\n\n.Loop_31_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x15\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx15, x15, x13,    hs\t// exchange |fg0| and |fg1|\n\tcsel\tx13, x13, x19,   hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x15, x22\n\tand\tx20, x23, x22\n\tsub\tx13, x13, x19\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx15, x15, x15\t// |f1|<<=1\n\tadd\tx13, x13, x20\n\tsub\tx15, x15, x23\n\tcbnz\tx2, .Loop_31_256\n\n\tmov\tx23, #0x7FFFFFFF\n\tubfx\tx12, x13, #0, #32\n\tubfx\tx13, x13, #32, #32\n\tubfx\tx14, x15, #0, #32\n\tubfx\tx15, x15, #32, #32\n\tsub\tx12, x12, x23\t\t// remove bias\n\tsub\tx13, x13, x23\n\tsub\tx14, x14, x23\n\tsub\tx15, x15, x23\n\n\tret\n.size\t__inner_loop_31_256,.-__inner_loop_31_256\n\n.type\t__inner_loop_62_256, %function\n.align\t4\n__inner_loop_62_256:\n\tmov\tx12, #1\t\t// |f0|=1\n\tmov\tx13, #0\t\t// |g0|=0\n\tmov\tx14, #0\t\t// |f1|=0\n\tmov\tx15, #1\t\t// |g1|=1\n\n.Loop_62_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x12\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tmov\tx20, x13\n\tcsel\tx12, x12, x14,       hs\t// exchange |f0| and |f1|\n\tcsel\tx14, x14, x19,     hs\n\tcsel\tx13, x13, x15,       hs\t// exchange |g0| and |g1|\n\tcsel\tx15, x15, x20,     hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x14, x22\n\tand\tx20, x15, x22\n\tadd\tx14, x14, x14\t\t// |f1|<<=1\n\tadd\tx15, x15, x15\t\t// |g1|<<=1\n\tsub\tx12, x12, x19\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx13, x13, x20\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, .Loop_62_256\n\n\tret\n.size\t__inner_loop_62_256,.-__inner_loop_62_256\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/ct_inverse_mod_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tct_inverse_mod_384\n.hidden\tct_inverse_mod_384\n.type\tct_inverse_mod_384, %function\n.align\t5\nct_inverse_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29, c30, [csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29, csp, #0\n\tstp\tc19, c20, [csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21, c22, [csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23, c24, [csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25, c26, [csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27, c28, [csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp, csp, #1056\n\n\tldp\tx22,   x4, [c1,#8*0]\n\tldp\tx5, x6, [c1,#8*2]\n\tldp\tx7, x8, [c1,#8*4]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tc1,csp,#32+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #32+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstp\tc0, c3, [csp]\t\t// offload out_ptr, nx_ptr\n\n\tldp\tx9, x10, [c2,#8*0]\n\tldp\tx11, x12, [c2,#8*2]\n\tldp\tx13, x14, [c2,#8*4]\n\n\tstp\tx22,   x4, [c1,#8*0]\t// copy input to |a|\n\tstp\tx5, x6, [c1,#8*2]\n\tstp\tx7, x8, [c1,#8*4]\n\tstp\tx9, x10, [c1,#8*6]\t// copy modulus to |b|\n\tstp\tx11, x12, [c1,#8*8]\n\tstp\tx13, x14, [c1,#8*10]\n\n\t////////////////////////////////////////// first iteration\n\tmov\tx2, #62\n\tbl\t.Lab_approximation_62_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15,[c0,#8*12]\t\t// initialize |u| with |f0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15, [c0,#8*14]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tldr\tx7, [c1,#8*12]\t// |u|\n\tldr\tx8, [c1,#8*20]\t// |v|\n\tmul\tx3, x20, x7\t\t// |u|*|f0|\n\tsmulh\tx4, x20, x7\n\tmul\tx5, x21, x8\t\t// |v|*|g0|\n\tsmulh\tx6, x21, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [c0,#8*6]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [c0,#8*8]\n\tstp\tx5, x5, [c0,#8*10]\n\n\tmul\tx3, x15, x7\t\t// |u|*|f1|\n\tsmulh\tx4, x15, x7\n\tmul\tx5, x16, x8\t\t// |v|*|g1|\n\tsmulh\tx6, x16, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [c0,#8*14]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [c0,#8*16]\n\tstp\tx5, x5, [c0,#8*18]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\n\tstr\tx27, [c0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\t\t// sign extension\n\tstp\tx27, x27, [c0,#8*6]\n\tstp\tx27, x27, [c0,#8*8]\n\tstp\tx27, x27, [c0,#8*10]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [c0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [c0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [c0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [c0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tc0,c0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [c0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\t////////////////////////////////////////// iteration before last\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldp\tx3, x8, [c1,#8*0]\t// just load\n\tldp\tx9, x14, [c1,#8*6]\n\tbl\t__inner_loop_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tstr\tx3, [c0,#8*0]\n\tstr\tx9, [c0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f0|\n\tmov\tx21, x16\t\t\t// exact |g0|\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tc0,c0,#8*12\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [c0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f1|\n\tmov\tx21, x16\t\t\t// exact |g1|\n\tadd\tc0,c0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\t////////////////////////////////////////// last iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #24\t\t\t// 768 % 62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldr\tx3, [c1,#8*0]\t\t// just load\n\teor\tx8, x8, x8\n\tldr\tx9, [c1,#8*6]\n\teor\tx14, x14, x14\n\tbl\t__inner_loop_62\n\n\tmov\tx20, x17\n\tmov\tx21, x19\n\tldp\tc0, c15, [csp]\t\t\t// original out_ptr and n_ptr\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\tldr\tc30, [c29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx23, x8, x21\t\t// figure out top-most limb\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [c15,#8*0]\t// load |mod|\n\tadd\tx23, x23, x26\t\t// x23 is 1, 0 or -1\n\tldp\tx11, x12, [c15,#8*2]\n\tasr\tx22, x23, #63\t\t// sign as mask\n\tldp\tx13, x14, [c15,#8*4]\n\n\tand\tx26,   x9, x22\t\t// add mod<<384 conditionally\n\tand\tx27,   x10, x22\n\tadds\tx3, x3, x26\n\tand\tx28,   x11, x22\n\tadcs\tx4, x4, x27\n\tand\tx2,   x12, x22\n\tadcs\tx5, x5, x28\n\tand\tx26,   x13, x22\n\tadcs\tx6, x6, x2\n\tand\tx27,   x14, x22\n\tadcs\tx7, x7, x26\n\tadcs\tx8, x25,   x27\n\tadc\tx23, x23, xzr\t\t// x23 is 1, 0 or -1\n\n\tneg\tx22, x23\n\torr\tx23, x23, x22\t\t// excess bit or sign as mask\n\tasr\tx22, x22, #63\t\t// excess bit as mask\n\n\tand\tx9, x9, x23\t\t// mask |mod|\n\tand\tx10, x10, x23\n\tand\tx11, x11, x23\n\tand\tx12, x12, x23\n\tand\tx13, x13, x23\n\tand\tx14, x14, x23\n\n\teor\tx9,  x9, x22\t// conditionally negate |mod|\n\teor\tx10,  x10, x22\n\tadds\tx9,  x9, x22, lsr#63\n\teor\tx11,  x11, x22\n\tadcs\tx10,  x10, xzr\n\teor\tx12,  x12, x22\n\tadcs\tx11,  x11, xzr\n\teor\tx13, x13, x22\n\tadcs\tx12,  x12, xzr\n\teor\tx14, x14, x22\n\tadcs\tx13, x13, xzr\n\tadc\tx14, x14, xzr\n\n\tadds\tx3, x3, x9\t// final adjustment for |mod|<<384\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [c0,#8*6]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [c0,#8*8]\n\tadc\tx8, x8, x14\n\tstp\tx7, x8, [c0,#8*10]\n\n\tadd\tcsp, csp, #1056\n\tldp\tc19, c20, [c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21, c22, [c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23, c24, [c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25, c26, [c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27, c28, [c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29, [csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tct_inverse_mod_384,.-ct_inverse_mod_384\n\n////////////////////////////////////////////////////////////////////////\n// see corresponding commentary in ctx_inverse_mod_384-x86_64...\n.type\t__smul_384x63, %function\n.align\t5\n__smul_384x63:\n\tldp\tx3, x4, [c1,#8*0+96]\t// load |u| (or |v|)\n\tasr\tx17, x20, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx5, x6, [c1,#8*2+96]\n\teor\tx20, x20, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx7, x8, [c1,#8*4+96]\n\n\teor\tx3, x3, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx25, [c1,#8*6+96]\n\tsub\tx20, x20, x17\n\teor\tx4, x4, x17\n\tadds\tx3, x3, x17, lsr#63\n\teor\tx5, x5, x17\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x17\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x3, x20\n\teor\tx8, x8, x17\n\tumulh\tx23, x4, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x5, x20\n\teor\tx25, x25, x17\n\tmul\tx3, x3, x20\n\tadcs\tx8, x8, xzr\n\tmul\tx4, x4, x20\n\tadcs\tx25, x25, xzr\n\tcmp\tx20, #0\n\tmul\tx5, x5, x20\n\tcsel\tx25, x25, xzr, ne\n\tadds\tx4, x4, x22\n\tumulh\tx22, x6, x20\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x7, x20\n\tmul\tx6, x6, x20\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x24\n\tmul\tx27,x8, x20\n\tadcs\tx7, x7, x22\n\tadcs\tx27,x27,x23\n\tadc\tx2, xzr, xzr\n\tldp\tx9, x10, [c1,#8*0+160]\t// load |u| (or |v|)\n\tasr\tx17, x21, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx11, x12, [c1,#8*2+160]\n\teor\tx21, x21, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx13, x14, [c1,#8*4+160]\n\n\teor\tx9, x9, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx26, [c1,#8*6+160]\n\tsub\tx21, x21, x17\n\teor\tx10, x10, x17\n\tadds\tx9, x9, x17, lsr#63\n\teor\tx11, x11, x17\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x17\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x17\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x9, x21\n\teor\tx14, x14, x17\n\tumulh\tx23, x10, x21\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x11, x21\n\teor\tx26, x26, x17\n\tmul\tx9, x9, x21\n\tadcs\tx14, x14, xzr\n\tmul\tx10, x10, x21\n\tadcs\tx26, x26, xzr\n\tadc\tx19, xzr, xzr\t\t// used in __smul_768x63_tail\n\tcmp\tx21, #0\n\tmul\tx11, x11, x21\n\tcsel\tx26, x26, xzr, ne\n\tadds\tx10, x10, x22\n\tumulh\tx22, x12, x21\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x13, x21\n\tmul\tx12, x12, x21\n\tmul\tx13, x13, x21\n\tadcs\tx12, x12, x24\n\tmul\tx28,x14, x21\n\tadcs\tx13, x13, x22\n\tadcs\tx28,x28,x23\n\tadc\tx2, x2, xzr\n\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [c0,#8*0]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [c0,#8*2]\n\tadcs\tx27,   x27,   x28\n\tstp\tx7, x27,   [c0,#8*4]\n\n\tret\n.size\t__smul_384x63,.-__smul_384x63\n\n.type\t__smul_768x63_tail, %function\n.align\t5\n__smul_768x63_tail:\n\tumulh\tx27, x8, x20\n\tldr\tx4, [c1,#8*27]// load rest of |v|\n\tadc\tx2, x2, xzr\n\tldp\tx5, x6, [c1,#8*28]\n\tand\tx25, x25, x20\n\tldp\tx7, x8, [c1,#8*30]\n\tsub\tx27, x27, x25\t// tie up |u|*|f1| chain\n\n\tumulh\tx14, x14, x21\t// resume |v|*|g1| chain\n\teor\tx4, x4, x17\t// conditionally negate rest of |v|\n\teor\tx5, x5, x17\n\teor\tx6, x6, x17\n\tadds\tx4, x4, x19\n\teor\tx7, x7, x17\n\tadcs\tx5, x5, xzr\n\teor\tx8, x8, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x26,   x21\n\tadcs\tx7, x7, xzr\n\tumulh\tx23, x4, x21\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x21\n\tadd\tx14, x14, x2\n\tumulh\tx25, x6, x21\n\tasr\tx28, x27, #63\n\tumulh\tx2, x7, x21\n\tmul\tx3, x26,   x21\n\tmul\tx4, x4, x21\n\tmul\tx5, x5, x21\n\tadds\tx3, x3, x14\n\tmul\tx6, x6, x21\n\tadcs\tx4, x4, x22\n\tmul\tx7, x7, x21\n\tadcs\tx5, x5, x23\n\tmul\tx22,   x8, x21\n\tadcs\tx6, x6, x24\n\tadcs\tx7, x7, x25\n\tadcs\tx25,   x22, x2\n\tadc\tx26, xzr, xzr\t\t// used in the final step\n\n\tadds\tx3, x3, x27\n\tadcs\tx4, x4, x28\n\tadcs\tx5, x5, x28\n\tadcs\tx6, x6, x28\n\tstp\tx3, x4, [c0,#8*6]\n\tadcs\tx7, x7, x28\n\tstp\tx5, x6, [c0,#8*8]\n\tadcs\tx25,   x25,   x28\t// carry is used in the final step\n\tstp\tx7, x25,   [c0,#8*10]\n\n\tret\n.size\t__smul_768x63_tail,.-__smul_768x63_tail\n\n.type\t__smul_384_n_shift_by_62, %function\n.align\t5\n__smul_384_n_shift_by_62:\n\tldp\tx3, x4, [c1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx28, x15, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx5, x6, [c1,#8*2+0]\n\teor\tx2, x15, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx7, x8, [c1,#8*4+0]\n\n\teor\tx3, x3, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tumulh\tx22, x3, x2\n\tadcs\tx6, x6, xzr\n\tumulh\tx23, x4, x2\n\teor\tx8, x8, x28\n\tmul\tx3, x3, x2\n\tadcs\tx7, x7, xzr\n\tmul\tx4, x4, x2\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x6, x2\n\tadds\tx4, x4, x22\n\tmul\tx5, x5, x2\n\tumulh\tx22, x7, x2\n\tneg\tx28, x28\n\tmul\tx6, x6, x2\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x8, x2\n\tmul\tx7, x7, x2\n\tadcs\tx6, x6, x24\n\tmul\tx8, x8, x2\n\tadcs\tx7, x7, x25\n\tadcs\tx8, x8, x22\n\tadc\tx27, x23, x28\n\tldp\tx9, x10, [c1,#8*0+48]\t// load |a| (or |b|)\n\tasr\tx28, x16, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx11, x12, [c1,#8*2+48]\n\teor\tx2, x16, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx13, x14, [c1,#8*4+48]\n\n\teor\tx9, x9, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx10, x10, x28\n\tadds\tx9, x9, x28, lsr#63\n\teor\tx11, x11, x28\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x28\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x28\n\tumulh\tx22, x9, x2\n\tadcs\tx12, x12, xzr\n\tumulh\tx23, x10, x2\n\teor\tx14, x14, x28\n\tmul\tx9, x9, x2\n\tadcs\tx13, x13, xzr\n\tmul\tx10, x10, x2\n\tadc\tx14, x14, xzr\n\n\tumulh\tx24, x11, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x12, x2\n\tadds\tx10, x10, x22\n\tmul\tx11, x11, x2\n\tumulh\tx22, x13, x2\n\tneg\tx28, x28\n\tmul\tx12, x12, x2\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x14, x2\n\tmul\tx13, x13, x2\n\tadcs\tx12, x12, x24\n\tmul\tx14, x14, x2\n\tadcs\tx13, x13, x25\n\tadcs\tx14, x14, x22\n\tadc\tx28, x23, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x27,   x28\n\n\textr\tx3, x4, x3, #62\n\textr\tx4, x5, x4, #62\n\textr\tx5, x6, x5, #62\n\tasr\tx28, x9, #63\n\textr\tx6, x7, x6, #62\n\textr\tx7, x8, x7, #62\n\textr\tx8, x9, x8, #62\n\n\teor\tx3, x3, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x28\n\tstp\tx3, x4, [c0,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [c0,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [c0,#8*4]\n\n\teor\tx15, x15, x28\n\teor\tx16, x16, x28\n\tsub\tx15, x15, x28\n\tsub\tx16, x16, x28\n\n\tret\n.size\t__smul_384_n_shift_by_62,.-__smul_384_n_shift_by_62\n.type\t__ab_approximation_62, %function\n.align\t4\n__ab_approximation_62:\n\tldp\tx7, x8, [c1,#8*4]\n\tldp\tx13, x14, [c1,#8*10]\n\tldp\tx5, x6, [c1,#8*2]\n\tldp\tx11, x12, [c1,#8*8]\n\n.Lab_approximation_62_loaded:\n\torr\tx22, x8, x14\t// check top-most limbs, ...\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx22, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tldp\tx3, x4, [c1,#8*0]\n\tldp\tx9, x10, [c1,#8*6]\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx22, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx22, x8, x14\n\tcsel\tx13, x13, x10, ne\n\n\tclz\tx22, x22\n\tcmp\tx22, #64\n\tcsel\tx22, x22, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx23, x22\n\n\tlslv\tx8, x8, x22\t// align high limbs to the left\n\tlslv\tx14, x14, x22\n\tlsrv\tx7, x7, x23\n\tlsrv\tx13, x13, x23\n\tand\tx7, x7, x23, asr#6\n\tand\tx13, x13, x23, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tb\t__inner_loop_62\n\tret\n.size\t__ab_approximation_62,.-__ab_approximation_62\n.type\t__inner_loop_62, %function\n.align\t4\n__inner_loop_62:\n\tmov\tx15, #1\t\t// |f0|=1\n\tmov\tx16, #0\t\t// |g0|=0\n\tmov\tx17, #0\t\t// |f1|=0\n\tmov\tx19, #1\t\t// |g1|=1\n\n.Loop_62:\n\tsbfx\tx28, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tsubs\tx24, x9, x3\t// |b_|-|a_|\n\tand\tx22, x9, x28\n\tsbc\tx25, x14, x8\n\tand\tx23, x14, x28\n\tsubs\tx26, x3, x22\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx22, x15\n\tsbcs\tx27, x8, x23\n\tmov\tx23, x16\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx14, x14, x8, hs\n\tcsel\tx3, x26, x24, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx8, x27, x25, hs\n\tcsel\tx15, x15, x17,       hs\t// exchange |f0| and |f1|\n\tcsel\tx17, x17, x22,     hs\n\tcsel\tx16, x16, x19,       hs\t// exchange |g0| and |g1|\n\tcsel\tx19, x19, x23,     hs\n\textr\tx3, x8, x3, #1\n\tlsr\tx8, x8, #1\n\tand\tx22, x17, x28\n\tand\tx23, x19, x28\n\tadd\tx17, x17, x17\t\t// |f1|<<=1\n\tadd\tx19, x19, x19\t\t// |g1|<<=1\n\tsub\tx15, x15, x22\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx16, x16, x23\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, .Loop_62\n\n\tret\n.size\t__inner_loop_62,.-__inner_loop_62\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/ct_is_square_mod_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tct_is_square_mod_384\n.hidden\tct_is_square_mod_384\n.type\tct_is_square_mod_384, %function\n.align\t5\nct_is_square_mod_384:\n\thint\t#PACI_HINT\n\tstp\tc29, c30, [csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29, csp, #0\n\tstp\tc19, c20, [csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21, c22, [csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23, c24, [csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25, c26, [csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27, c28, [csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp, csp, #512\n\n\tldp\tx3, x4, [c0,#8*0]\t\t// load input\n\tldp\tx5, x6, [c0,#8*2]\n\tldp\tx7, x8, [c0,#8*4]\n\n\tadd\tx0, sp, #255\t// find closest 256-byte-aligned spot\n\tand\tx0, x0, #-256\t// in the frame...\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\n\tldp\tx9, x10, [c1,#8*0]\t\t// load modulus\n\tldp\tx11, x12, [c1,#8*2]\n\tldp\tx13, x14, [c1,#8*4]\n\n\tstp\tx3, x4, [c0,#8*6]\t// copy input to |a|\n\tstp\tx5, x6, [c0,#8*8]\n\tstp\tx7, x8, [c0,#8*10]\n\tstp\tx9, x10, [c0,#8*0]\t// copy modulus to |b|\n\tstp\tx11, x12, [c0,#8*2]\n\tstp\tx13, x14, [c0,#8*4]\n\n\teor\tx2, x2, x2\t\t\t// init the .Legendre symbol\n\tmov\tx15, #24\t\t\t// 24 is 768/30-1\n\tb\t.Loop_is_square\n\n.align\t4\n.Loop_is_square:\n\tbl\t__ab_approximation_30\n\tsub\tx15, x15, #1\n\n\teor\tx1, x0, #128\t\t// pointer to dst |b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,csp,x1\n#endif\n\tbl\t__smul_384_n_shift_by_30\n\n\tmov\tx19, x16\t\t\t// |f0|\n\tmov\tx20, x17\t\t\t// |g0|\n\tadd\tc1,c1,#8*6\n\tbl\t__smul_384_n_shift_by_30\n\n\tldp\tx9, x10, [c1,#-8*6]\n\teor\tx0, x0, #128\t\t// flip-flop src |a|b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\tand\tx27, x27, x9\t\t// if |a| was negative,\n\tadd\tx2, x2, x27, lsr#1\t\t// adjust |L|\n\n\tcbnz\tx15, .Loop_is_square\n\n\t////////////////////////////////////////// last iteration\n\t//bl\t__ab_approximation_30\t\t// |a| and |b| are exact,\n\t//ldr\tx8, [x0,#8*6]\t\t// and loaded\n\t//ldr\tx14, [x0,#8*0]\n\tmov\tx15, #48\t\t\t// 48 is 768%30 + 30\n\tbl\t__inner_loop_48\n\tldr\tc30, [c29,#__SIZEOF_POINTER__]\n\n\tand\tx0, x2, #1\n\teor\tx0, x0, #1\n\n\tadd\tcsp, csp, #512\n\tldp\tc19, c20, [c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21, c22, [c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23, c24, [c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25, c26, [c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27, c28, [c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29, [csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tct_is_square_mod_384,.-ct_is_square_mod_384\n\n.type\t__smul_384_n_shift_by_30, %function\n.align\t5\n__smul_384_n_shift_by_30:\n\tldp\tx3, x4, [c0,#8*0+0]\t// load |b| (or |a|)\n\tasr\tx27, x20, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx5, x6, [c0,#8*2+0]\n\teor\tx20, x20, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx7, x8, [c0,#8*4+0]\n\n\teor\tx3, x3, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx20, x20, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tumulh\tx21, x3, x20\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x4, x20\n\teor\tx8, x8, x27\n\tumulh\tx23, x5, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x6, x20\n\tadc\tx8, x8, xzr\n\n\tumulh\tx25, x7, x20\n\tand\tx28, x20, x27\n\tumulh\tx26, x8, x20\n\tneg\tx28, x28\n\tmul\tx3, x3, x20\n\tmul\tx4, x4, x20\n\tmul\tx5, x5, x20\n\tadds\tx4, x4, x21\n\tmul\tx6, x6, x20\n\tadcs\tx5, x5, x22\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x23\n\tmul\tx8, x8, x20\n\tadcs\tx7, x7, x24\n\tadcs\tx8, x8 ,x25\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [c0,#8*0+48]\t// load |b| (or |a|)\n\tasr\tx27, x19, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx11, x12, [c0,#8*2+48]\n\teor\tx19, x19, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx13, x14, [c0,#8*4+48]\n\n\teor\tx9, x9, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx19, x19, x27\n\teor\tx10, x10, x27\n\tadds\tx9, x9, x27, lsr#63\n\teor\tx11, x11, x27\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x27\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x27\n\tumulh\tx21, x9, x19\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x10, x19\n\teor\tx14, x14, x27\n\tumulh\tx23, x11, x19\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x12, x19\n\tadc\tx14, x14, xzr\n\n\tumulh\tx25, x13, x19\n\tand\tx28, x19, x27\n\tumulh\tx27, x14, x19\n\tneg\tx28, x28\n\tmul\tx9, x9, x19\n\tmul\tx10, x10, x19\n\tmul\tx11, x11, x19\n\tadds\tx10, x10, x21\n\tmul\tx12, x12, x19\n\tadcs\tx11, x11, x22\n\tmul\tx13, x13, x19\n\tadcs\tx12, x12, x23\n\tmul\tx14, x14, x19\n\tadcs\tx13, x13, x24\n\tadcs\tx14, x14 ,x25\n\tadc\tx27, x27, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x26,   x27\n\n\textr\tx3, x4, x3, #30\n\textr\tx4, x5, x4, #30\n\textr\tx5, x6, x5, #30\n\tasr\tx27, x9, #63\n\textr\tx6, x7, x6, #30\n\textr\tx7, x8, x7, #30\n\textr\tx8, x9, x8, #30\n\n\teor\tx3, x3, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x27\n\tstp\tx3, x4, [c1,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [c1,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [c1,#8*4]\n\n\tret\n.size\t__smul_384_n_shift_by_30,.-__smul_384_n_shift_by_30\n.type\t__ab_approximation_30, %function\n.align\t4\n__ab_approximation_30:\n\tldp\tx13, x14, [c0,#8*4]\t// |a| is still in registers\n\tldp\tx11, x12, [c0,#8*2]\n\n\torr\tx21, x8, x14\t// check top-most limbs, ...\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx21, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx21, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx21, x8, x14\t// and one more, ...\n\tcsel\tx13, x13, x10, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x3, ne\n\torr\tx21, x8, x14\n\tcsel\tx13, x13, x9, ne\n\n\tclz\tx21, x21\n\tcmp\tx21, #64\n\tcsel\tx21, x21, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx22, x21\n\n\tlslv\tx8, x8, x21\t// align high limbs to the left\n\tlslv\tx14, x14, x21\n\tlsrv\tx7, x7, x22\n\tlsrv\tx13, x13, x22\n\tand\tx7, x7, x22, asr#6\n\tand\tx13, x13, x22, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tbfxil\tx8, x3, #0, #32\n\tbfxil\tx14, x9, #0, #32\n\n\tb\t__inner_loop_30\n\tret\n.size\t__ab_approximation_30,.-__ab_approximation_30\n\n.type\t__inner_loop_30, %function\n.align\t4\n__inner_loop_30:\n\tmov\tx28, #30\n\tmov\tx17, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx20, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx27,#0x7FFFFFFF7FFFFFFF\n\n.Loop_30:\n\tsbfx\tx24, x8, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x8, x14\n\tsub\tx28, x28, #1\n\tand\tx21, x14, x24\n\n\tsub\tx22, x14, x8\t\t// |b_|-|a_|\n\tsubs\tx23, x8, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\t// L + (a_ & b_) >> 1\n\tmov\tx21, x20\n\tcsel\tx14, x14, x8, hs\t// |b_| = |a_|\n\tcsel\tx8, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx20, x20, x17,  hs\t// exchange |fg0| and |fg1|\n\tcsel\tx17, x17, x21, hs\n\tcsel\tx2,   x2,   x25, hs\n\tlsr\tx8, x8, #1\n\tand\tx21, x20, x24\n\tand\tx22, x27, x24\n\tadd\tx23, x14, #2\n\tsub\tx17, x17, x21\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx20, x20, x20\t// |f1|<<=1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\tadd\tx17, x17, x22\n\tsub\tx20, x20, x27\n\n\tcbnz\tx28, .Loop_30\n\n\tmov\tx27, #0x7FFFFFFF\n\tubfx\tx16, x17, #0, #32\n\tubfx\tx17, x17, #32, #32\n\tubfx\tx19, x20, #0, #32\n\tubfx\tx20, x20, #32, #32\n\tsub\tx16, x16, x27\t\t// remove the bias\n\tsub\tx17, x17, x27\n\tsub\tx19, x19, x27\n\tsub\tx20, x20, x27\n\n\tret\n.size\t__inner_loop_30,.-__inner_loop_30\n.type\t__inner_loop_48, %function\n.align\t4\n__inner_loop_48:\n.Loop_48:\n\tsbfx\tx24, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x3, x9\n\tsub\tx15, x15, #1\n\tand\tx21, x9, x24\n\tsub\tx22, x9, x3\t\t// |b_|-|a_|\n\tsubs\tx23, x3, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx3, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx2,   x2,   x25, hs\n\tadd\tx23, x9, #2\n\tlsr\tx3, x3, #1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\n\tcbnz\tx15, .Loop_48\n\n\tret\n.size\t__inner_loop_48,.-__inner_loop_48\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/div3w-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tdiv_3_limbs\n.hidden\tdiv_3_limbs\n.type\tdiv_3_limbs,%function\n.align\t5\ndiv_3_limbs:\n\thint\t#34\n\tldp\tx4,x5,[c0]\t// load R\n\teor\tx0,x0,x0\t// Q = 0\n\tmov\tx3,#64\t\t// loop counter\n\tnop\n\n.Loop:\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsbcs\tx7,x5,x2\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tcsel\tx4,x4,x6,lo\t// select between R and R - D\n\textr\tx1,x2,x1,#1\t// D >>= 1\n\tcsel\tx5,x5,x7,lo\n\tlsr\tx2,x2,#1\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\tsub\tx3,x3,#1\n\tcbnz\tx3,.Loop\n\n\tasr\tx3,x0,#63\t// top bit -> mask\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tsbcs\tx7,x5,x2\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\n\torr\tx0,x0,x3\t// all ones if overflow\n\n\tret\n.size\tdiv_3_limbs,.-div_3_limbs\n.globl\tquot_rem_128\n.hidden\tquot_rem_128\n.type\tquot_rem_128,%function\n.align\t5\nquot_rem_128:\n\thint\t#34\n\tldp\tx3,x4,[c1]\n\n\tmul\tx5,x3,x2\t// divisor[0:1} * quotient\n\tumulh\tx6,x3,x2\n\tmul\tx11,  x4,x2\n\tumulh\tx7,x4,x2\n\n\tldp\tx8,x9,[c0]\t// load 3 limbs of the dividend\n\tldr\tx10,[c0,#16]\n\n\tadds\tx6,x6,x11\n\tadc\tx7,x7,xzr\n\n\tsubs\tx8,x8,x5\t// dividend - divisor * quotient\n\tsbcs\tx9,x9,x6\n\tsbcs\tx10,x10,x7\n\tsbc\tx5,xzr,xzr\t\t// borrow -> mask\n\n\tadd\tx2,x2,x5\t// if borrowed, adjust the quotient ...\n\tand\tx3,x3,x5\n\tand\tx4,x4,x5\n\tadds\tx8,x8,x3\t// ... and add divisor\n\tadc\tx9,x9,x4\n\n\tstp\tx8,x9,[c0]\t// save 2 limbs of the remainder\n\tstr\tx2,[c0,#16]\t// and one limb of the quotient\n\n\tmov\tx0,x2\t\t// return adjusted quotient\n\n\tret\n.size\tquot_rem_128,.-quot_rem_128\n\n.globl\tquot_rem_64\n.hidden\tquot_rem_64\n.type\tquot_rem_64,%function\n.align\t5\nquot_rem_64:\n\thint\t#34\n\tldr\tx3,[c1]\n\tldr\tx8,[c0]\t// load 1 limb of the dividend\n\n\tmul\tx5,x3,x2\t// divisor * quotient\n\n\tsub\tx8,x8,x5\t// dividend - divisor * quotient\n\n\tstp\tx8,x2,[c0]\t// save remainder and quotient\n\n\tmov\tx0,x2\t\t// return quotient\n\n\tret\n.size\tquot_rem_64,.-quot_rem_64\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/mul_mont_256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tmul_mont_sparse_256\n.hidden\tmul_mont_sparse_256\n.type\tmul_mont_sparse_256,%function\n.align\t5\nmul_mont_sparse_256:\n\thint\t#34\n\tstp\tc29,c30,[csp,#-8*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[c1]\n\tldr\tx9,        [c2]\n\tldp\tx12,x13,[c1,#16]\n\n\tmul\tx19,x10,x9\n\tldp\tx5,x6,[c3]\n\tmul\tx20,x11,x9\n\tldp\tx7,x8,[c3,#16]\n\tmul\tx21,x12,x9\n\tmul\tx22,x13,x9\n\n\tumulh\tx14,x10,x9\n\tumulh\tx15,x11,x9\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tumulh\tx17,x13,x9\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,xzr,    x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[c2,8*1]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[c2,8*2]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[c2,8*3]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tadcs\tx20,x21,x15\n\tadcs\tx21,x22,x16\n\tadcs\tx22,x23,x17\n\tadc\tx23,xzr,xzr\n\n\tsubs\tx14,x19,x5\n\tsbcs\tx15,x20,x6\n\tsbcs\tx16,x21,x7\n\tsbcs\tx17,x22,x8\n\tsbcs\txzr,    x23,xzr\n\n\tcsel\tx19,x19,x14,lo\n\tcsel\tx20,x20,x15,lo\n\tcsel\tx21,x21,x16,lo\n\tcsel\tx22,x22,x17,lo\n\n\tstp\tx19,x20,[c0]\n\tstp\tx21,x22,[c0,#16]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#8*__SIZEOF_POINTER__\n\tret\n.size\tmul_mont_sparse_256,.-mul_mont_sparse_256\n.globl\tsqr_mont_sparse_256\n.hidden\tsqr_mont_sparse_256\n.type\tsqr_mont_sparse_256,%function\n.align\t5\nsqr_mont_sparse_256:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[c1]\n\tldp\tx7,x8,[c1,#16]\n\tmov\tx4,x3\n\n\t////////////////////////////////////////////////////////////////\n\t//  |  |  |  |  |  |a1*a0|  |\n\t//  |  |  |  |  |a2*a0|  |  |\n\t//  |  |a3*a2|a3*a0|  |  |  |\n\t//  |  |  |  |a2*a1|  |  |  |\n\t//  |  |  |a3*a1|  |  |  |  |\n\t// *|  |  |  |  |  |  |  | 2|\n\t// +|a3*a3|a2*a2|a1*a1|a0*a0|\n\t//  |--+--+--+--+--+--+--+--|\n\t//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10\n\t//\n\t//  \"can't overflow\" below mark carrying into high part of\n\t//  multiplication result, which can't overflow, because it\n\t//  can never be all ones.\n\n\tmul\tx11,x6,x5\t// a[1]*a[0]\n\tumulh\tx15,x6,x5\n\tmul\tx12,x7,x5\t// a[2]*a[0]\n\tumulh\tx16,x7,x5\n\tmul\tx13,x8,x5\t// a[3]*a[0]\n\tumulh\tx19,x8,x5\n\n\tadds\tx12,x12,x15\t// accumulate high parts of multiplication\n\tmul\tx14,x7,x6\t// a[2]*a[1]\n\tumulh\tx15,x7,x6\n\tadcs\tx13,x13,x16\n\tmul\tx16,x8,x6\t// a[3]*a[1]\n\tumulh\tx17,x8,x6\n\tadc\tx19,x19,xzr\t// can't overflow\n\n\tmul\tx20,x8,x7\t// a[3]*a[2]\n\tumulh\tx21,x8,x7\n\n\tadds\tx15,x15,x16\t// accumulate high parts of multiplication\n\tmul\tx10,x5,x5\t// a[0]*a[0]\n\tadc\tx16,x17,xzr\t// can't overflow\n\n\tadds\tx13,x13,x14\t// accumulate low parts of multiplication\n\tumulh\tx5,x5,x5\n\tadcs\tx19,x19,x15\n\tmul\tx15,x6,x6\t// a[1]*a[1]\n\tadcs\tx20,x20,x16\n\tumulh\tx6,x6,x6\n\tadc\tx21,x21,xzr\t// can't overflow\n\n\tadds\tx11,x11,x11\t// acc[1-6]*=2\n\tmul\tx16,x7,x7\t// a[2]*a[2]\n\tadcs\tx12,x12,x12\n\tumulh\tx7,x7,x7\n\tadcs\tx13,x13,x13\n\tmul\tx17,x8,x8\t// a[3]*a[3]\n\tadcs\tx19,x19,x19\n\tumulh\tx8,x8,x8\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadc\tx22,xzr,xzr\n\n\tadds\tx11,x11,x5\t// +a[i]*a[i]\n\tadcs\tx12,x12,x15\n\tadcs\tx13,x13,x6\n\tadcs\tx19,x19,x16\n\tadcs\tx20,x20,x7\n\tadcs\tx21,x21,x17\n\tadc\tx22,x22,x8\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadds\tx10,x10,x19\t// accumulate upper half\n\tadcs\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadc\tx19,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x19,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_sparse_256,.-sqr_mont_sparse_256\n.globl\tfrom_mont_256\n.hidden\tfrom_mont_256\n.type\tfrom_mont_256,%function\n.align\t5\nfrom_mont_256:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-2*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\n\tldr\tc29,[csp],#2*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tfrom_mont_256,.-from_mont_256\n\n.globl\tredc_mont_256\n.hidden\tredc_mont_256\n.type\tredc_mont_256,%function\n.align\t5\nredc_mont_256:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-2*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[c1]\n\tldp\tx12,x13,[c1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tx14,x15,[c1,#32]\n\tldp\tx16,x17,[c1,#48]\n\n\tadds\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadcs\tx12,x12,x16\n\tadcs\tx13,x13,x17\n\tadc\tx9,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x9,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[c0]\n\tstp\tx12,x13,[c0,#16]\n\n\tldr\tc29,[csp],#2*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tredc_mont_256,.-redc_mont_256\n\n.type\t__mul_by_1_mont_256,%function\n.align\t5\n__mul_by_1_mont_256:\n\tmul\tx3,x4,x10\n\tldp\tx5,x6,[c2]\n\tldp\tx7,x8,[c2,#16]\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tadc\tx13,x9,x17\n\n\tret\n.size\t__mul_by_1_mont_256,.-__mul_by_1_mont_256\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/mul_mont_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tadd_mod_384x384\n.hidden\tadd_mod_384x384\n.type\tadd_mod_384x384,%function\n.align\t5\nadd_mod_384x384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-8*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tbl\t__add_mod_384x384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#8*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tadd_mod_384x384,.-add_mod_384x384\n\n.type\t__add_mod_384x384,%function\n.align\t5\n__add_mod_384x384:\n\tldp\tx11,  x12,  [c1]\n\tldp\tx19,x20,[c2]\n\tldp\tx13,  x14,  [c1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[c2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [c1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[c2,#32]\n\tadcs\tx14,x14,x22\n\tstp\tx11,  x12,  [c0]\n\tadcs\tx15,x15,x23\n\tldp\tx11,  x12,  [c1,#48]\n\tadcs\tx16,x16,x24\n\n\tldp\tx19,x20,[c2,#48]\n\tstp\tx13,  x14,  [c0,#16]\n\tldp\tx13,  x14,  [c1,#64]\n\tldp\tx21,x22,[c2,#64]\n\n\tadcs\tx11,x11,x19\n\tstp\tx15,  x16,  [c0,#32]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [c1,#80]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[c2,#80]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tstp\tx11,x12,[c0,#48]\n\tcsel\tx15,x15,x23,lo\n\tstp\tx13,x14,[c0,#64]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx15,x16,[c0,#80]\n\n\tret\n.size\t__add_mod_384x384,.-__add_mod_384x384\n\n.globl\tsub_mod_384x384\n.hidden\tsub_mod_384x384\n.type\tsub_mod_384x384,%function\n.align\t5\nsub_mod_384x384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-8*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tbl\t__sub_mod_384x384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#8*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsub_mod_384x384,.-sub_mod_384x384\n\n.type\t__sub_mod_384x384,%function\n.align\t5\n__sub_mod_384x384:\n\tldp\tx11,  x12,  [c1]\n\tldp\tx19,x20,[c2]\n\tldp\tx13,  x14,  [c1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[c2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [c1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[c2,#32]\n\tsbcs\tx14,x14,x22\n\tstp\tx11,  x12,  [c0]\n\tsbcs\tx15,x15,x23\n\tldp\tx11,  x12,  [c1,#48]\n\tsbcs\tx16,x16,x24\n\n\tldp\tx19,x20,[c2,#48]\n\tstp\tx13,  x14,  [c0,#16]\n\tldp\tx13,  x14,  [c1,#64]\n\tldp\tx21,x22,[c2,#64]\n\n\tsbcs\tx11,x11,x19\n\tstp\tx15,  x16,  [c0,#32]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [c1,#80]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[c2,#80]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[c0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[c0,#64]\n\tstp\tx15,x16,[c0,#80]\n\n\tret\n.size\t__sub_mod_384x384,.-__sub_mod_384x384\n\n.type\t__add_mod_384,%function\n.align\t5\n__add_mod_384:\n\tldp\tx11,  x12,  [c1]\n\tldp\tx19,x20,[c2]\n\tldp\tx13,  x14,  [c1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[c2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [c1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[c2,#32]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tstp\tx11,x12,[c0]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx13,x14,[c0,#16]\n\tstp\tx15,x16,[c0,#32]\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.type\t__sub_mod_384,%function\n.align\t5\n__sub_mod_384:\n\tldp\tx11,  x12,  [c1]\n\tldp\tx19,x20,[c2]\n\tldp\tx13,  x14,  [c1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[c2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [c1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[c2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[c0]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[c0,#16]\n\tstp\tx15,x16,[c0,#32]\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tmul_mont_384x\n.hidden\tmul_mont_384x\n.type\tmul_mont_384x,%function\n.align\t5\nmul_mont_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp,csp,#288\t\t// space for 3 768-bit vectors\n\n\tmov\tc26,c0\t\t// save r_ptr\n\tmov\tc27,c1\t\t// save b_ptr\n\tmov\tc28,c2\t\t// save b_ptr\n\n\tadd\tc0,csp,#0\n\tbl\t__mul_384\n\n\tadd\tc1,c1,#48\n\tadd\tc2,c2,#48\n\tadd\tc0,csp,#96\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tsub\tc2,c1,#48\n\tadd\tc0,csp,#240\n\tbl\t__add_mod_384\n\n\tadd\tc1,c28,#0\n\tadd\tc2,c28,#48\n\tadd\tc0,csp,#192\n\tbl\t__add_mod_384\n\n\tadd\tc1,c0,#0\n\tadd\tc2,c0,#48\n\tbl\t__mul_384\t\t// mul_384(t2, a->re+a->im, b->re+b->im)\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tmov\tc1,c0\n\tadd\tc2,csp,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tc2,csp,#96\n\tbl\t__sub_mod_384x384\t// t2 = t2-t0-t1\n\n\tadd\tc1,csp,#0\n\tadd\tc2,csp,#96\n\tadd\tc0,csp,#0\n\tbl\t__sub_mod_384x384\t// t0 = t0-t1\n\n\tadd\tc1,csp,#0\n\tadd\tc0,c26,#0\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\n\tadd\tc1,csp,#192\n\tadd\tc0,c0,#48\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadd\tcsp,csp,#288\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_mont_384x,.-mul_mont_384x\n\n.globl\tsqr_mont_384x\n.hidden\tsqr_mont_384x\n.type\tsqr_mont_384x,%function\n.align\t5\nsqr_mont_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc3,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tcsp,csp,#96\t\t// space for 2 384-bit vectors\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[c2]\n\tldp\tx7,x8,[c2,#16]\n\tldp\tx9,x10,[c2,#32]\n\n\tadd\tc2,c1,#48\n\tadd\tc0,csp,#0\n\tbl\t__add_mod_384\t\t// t0 = a->re + a->im\n\n\tadd\tc0,csp,#48\n\tbl\t__sub_mod_384\t\t// t1 = a->re - a->im\n\n\tldp\tx11,x12,[c1]\n\tldr\tx17,        [c2]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx11,x11,x11\t// add with itself\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx19,x11,x19,lo\n\tcsel\tx20,x12,x20,lo\n\tcsel\tx21,x13,x21,lo\n\tldp\tx11,x12,[csp]\n\tcsel\tx22,x14,x22,lo\n\tldr\tx17,        [csp,#48]\n\tcsel\tx23,x15,x23,lo\n\tldp\tx13,x14,[csp,#16]\n\tcsel\tx24,x16,x24,lo\n\tldp\tx15,x16,[csp,#32]\n\n\tstp\tx19,x20,[c2,#48]\n\tstp\tx21,x22,[c2,#64]\n\tstp\tx23,x24,[c2,#80]\n\n\tadd\tc2,csp,#48\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->re, t0, t1)\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[c2]\n\tstp\tx13,x14,[c2,#16]\n\tstp\tx15,x16,[c2,#32]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_384x,.-sqr_mont_384x\n\n.globl\tmul_mont_384\n.hidden\tmul_mont_384\n.type\tmul_mont_384,%function\n.align\t5\nmul_mont_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc4,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\n\tldp\tx11,x12,[c1]\n\tldr\tx17,        [c2]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tbl\t__mul_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[c2]\n\tstp\tx13,x14,[c2,#16]\n\tstp\tx15,x16,[c2,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_mont_384,.-mul_mont_384\n\n.type\t__mul_mont_384,%function\n.align\t5\n__mul_mont_384:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tmov\tx17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[c2,8*1]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[c2,8*2]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[c2,8*3]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[c2,8*4]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[c2,8*5]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tldp\tc4,c2,[c29,#12*__SIZEOF_POINTER__]\t// pull r_ptr\n\tadc\tx17,x17,xzr\n\n\tadds\tx19,x20,x26\n\tadcs\tx20,x21,x27\n\tadcs\tx21,x22,x28\n\tadcs\tx22,x23,x0\n\tadcs\tx23,x24,x1\n\tadcs\tx24,x25,x3\n\tadc\tx25,x17,xzr\n\n\tsubs\tx26,x19,x5\n\tsbcs\tx27,x20,x6\n\tsbcs\tx28,x21,x7\n\tsbcs\tx0,x22,x8\n\tsbcs\tx1,x23,x9\n\tsbcs\tx3,x24,x10\n\tsbcs\txzr,    x25,xzr\n\n\tcsel\tx11,x19,x26,lo\n\tcsel\tx12,x20,x27,lo\n\tcsel\tx13,x21,x28,lo\n\tcsel\tx14,x22,x0,lo\n\tcsel\tx15,x23,x1,lo\n\tcsel\tx16,x24,x3,lo\n\tret\n.size\t__mul_mont_384,.-__mul_mont_384\n\n.globl\tsqr_mont_384\n.hidden\tsqr_mont_384\n.type\tsqr_mont_384,%function\n.align\t5\nsqr_mont_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp,csp,#96\t\t// space for 768-bit vector\n\tmov\tc4,c3\t\t// adjust for missing b_ptr\n\n\tmov\tc3,c0\t\t// save r_ptr\n\tmov\tc0,csp\n\n\tldp\tx11,x12,[c1]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\n\tbl\t__sqr_384\n\n\tldp\tx5,x6,[c2]\n\tldp\tx7,x8,[c2,#16]\n\tldp\tx9,x10,[c2,#32]\n\n\tmov\tc1,csp\n\tmov\tc0,c3\t\t// restore r_ptr\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_384,.-sqr_mont_384\n\n.globl\tsqr_n_mul_mont_383\n.hidden\tsqr_n_mul_mont_383\n.type\tsqr_n_mul_mont_383,%function\n.align\t5\nsqr_n_mul_mont_383:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc4,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tcsp,csp,#96\t\t// space for 768-bit vector\n\tmov\tc17,c5\t\t\t// save b_ptr\n\n\tldp\tx11,x12,[c1]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\tmov\tc0,csp\n.Loop_sqr_383:\n\tbl\t__sqr_384\n\tsub\tx2,x2,#1\t// counter\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tmov\tc1,csp\n\tbl\t__mul_by_1_mont_384\n\n\tldp\tx19,x20,[c1,#48]\n\tldp\tx21,x22,[c1,#64]\n\tldp\tx23,x24,[c1,#80]\n\n\tadds\tx11,x11,x19\t// just accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tcbnz\tx2,.Loop_sqr_383\n\n\tmov\tc2,c17\n\tldr\tx17,[c17]\n\tbl\t__mul_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[c2]\n\tstp\tx13,x14,[c2,#16]\n\tstp\tx15,x16,[c2,#32]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_n_mul_mont_383,.-sqr_n_mul_mont_383\n.type\t__sqr_384,%function\n.align\t5\n__sqr_384:\n\tmul\tx19,x12,x11\n\tmul\tx20,x13,x11\n\tmul\tx21,x14,x11\n\tmul\tx22,x15,x11\n\tmul\tx23,x16,x11\n\n\tumulh\tx6,x12,x11\n\tumulh\tx7,x13,x11\n\tumulh\tx8,x14,x11\n\tumulh\tx9,x15,x11\n\tadds\tx20,x20,x6\n\tumulh\tx10,x16,x11\n\tadcs\tx21,x21,x7\n\tmul\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tmul\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tmul\tx9,x15,x12\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x12\n\n\tadds\tx21,x21,x7\n\tumulh\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x12\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x12\n\tadc\tx25,xzr,xzr\n\n\tmul\tx5,x11,x11\n\tadds\tx22,x22,x7\n\tumulh\tx11,  x11,x11\n\tadcs\tx23,x23,x8\n\tmul\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tmul\tx9,x15,x13\n\tadc\tx25,x25,x10\n\tmul\tx10,x16,x13\n\n\tadds\tx23,x23,x8\n\tumulh\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tumulh\tx9,x15,x13\n\tadcs\tx25,x25,x10\n\tumulh\tx10,x16,x13\n\tadc\tx26,xzr,xzr\n\n\tmul\tx6,x12,x12\n\tadds\tx24,x24,x8\n\tumulh\tx12,  x12,x12\n\tadcs\tx25,x25,x9\n\tmul\tx9,x15,x14\n\tadc\tx26,x26,x10\n\tmul\tx10,x16,x14\n\n\tadds\tx25,x25,x9\n\tumulh\tx9,x15,x14\n\tadcs\tx26,x26,x10\n\tumulh\tx10,x16,x14\n\tadc\tx27,xzr,xzr\n\tmul\tx7,x13,x13\n\tadds\tx26,x26,x9\n\tumulh\tx13,  x13,x13\n\tadc\tx27,x27,x10\n\tmul\tx8,x14,x14\n\n\tmul\tx10,x16,x15\n\tumulh\tx14,  x14,x14\n\tadds\tx27,x27,x10\n\tumulh\tx10,x16,x15\n\tmul\tx9,x15,x15\n\tadc\tx28,x10,xzr\n\n\tadds\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadcs\tx22,x22,x22\n\tadcs\tx23,x23,x23\n\tadcs\tx24,x24,x24\n\tadcs\tx25,x25,x25\n\tadcs\tx26,x26,x26\n\tumulh\tx15,  x15,x15\n\tadcs\tx27,x27,x27\n\tmul\tx10,x16,x16\n\tadcs\tx28,x28,x28\n\tumulh\tx16,  x16,x16\n\tadc\tx1,xzr,xzr\n\n\tadds\tx19,x19,x11\n\tadcs\tx20,x20,x6\n\tadcs\tx21,x21,x12\n\tadcs\tx22,x22,x7\n\tadcs\tx23,x23,x13\n\tadcs\tx24,x24,x8\n\tadcs\tx25,x25,x14\n\tstp\tx5,x19,[c0]\n\tadcs\tx26,x26,x9\n\tstp\tx20,x21,[c0,#16]\n\tadcs\tx27,x27,x15\n\tstp\tx22,x23,[c0,#32]\n\tadcs\tx28,x28,x10\n\tstp\tx24,x25,[c0,#48]\n\tadc\tx16,x16,x1\n\tstp\tx26,x27,[c0,#64]\n\tstp\tx28,x16,[c0,#80]\n\n\tret\n.size\t__sqr_384,.-__sqr_384\n.globl\tsqr_384\n.hidden\tsqr_384\n.type\tsqr_384,%function\n.align\t5\nsqr_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[c1]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\n\tbl\t__sqr_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_384,.-sqr_384\n\n.globl\tredc_mont_384\n.hidden\tredc_mont_384\n.type\tredc_mont_384,%function\n.align\t5\nredc_mont_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[c2]\n\tldp\tx7,x8,[c2,#16]\n\tldp\tx9,x10,[c2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tredc_mont_384,.-redc_mont_384\n\n.globl\tfrom_mont_384\n.hidden\tfrom_mont_384\n.type\tfrom_mont_384,%function\n.align\t5\nfrom_mont_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[c2]\n\tldp\tx7,x8,[c2,#16]\n\tldp\tx9,x10,[c2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[c0]\n\tstp\tx13,x14,[c0,#16]\n\tstp\tx15,x16,[c0,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tfrom_mont_384,.-from_mont_384\n\n.type\t__mul_by_1_mont_384,%function\n.align\t5\n__mul_by_1_mont_384:\n\tldp\tx11,x12,[c1]\n\tldp\tx13,x14,[c1,#16]\n\tmul\tx26,x4,x11\n\tldp\tx15,x16,[c1,#32]\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\tret\n.size\t__mul_by_1_mont_384,.-__mul_by_1_mont_384\n\n.type\t__redc_tail_mont_384,%function\n.align\t5\n__redc_tail_mont_384:\n\tldp\tx19,x20,[c1,#48]\n\tldp\tx21,x22,[c1,#64]\n\tldp\tx23,x24,[c1,#80]\n\n\tadds\tx11,x11,x19\t// accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[c0]\n\tstp\tx13,x14,[c0,#16]\n\tstp\tx15,x16,[c0,#32]\n\n\tret\n.size\t__redc_tail_mont_384,.-__redc_tail_mont_384\n\n.globl\tmul_384\n.hidden\tmul_384\n.type\tmul_384,%function\n.align\t5\nmul_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tbl\t__mul_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_384,.-mul_384\n\n.type\t__mul_384,%function\n.align\t5\n__mul_384:\n\tldp\tx11,x12,[c1]\n\tldr\tx17,        [c2]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\n\tumulh\tx5,x11,x17\n\tumulh\tx6,x12,x17\n\tumulh\tx7,x13,x17\n\tumulh\tx8,x14,x17\n\tumulh\tx9,x15,x17\n\tumulh\tx10,x16,x17\n\tldr\tx17,[c2,8*1]\n\n\tstr\tx19,[c0]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[c2,#8*(1+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[c0,8*1]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[c2,#8*(2+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[c0,8*2]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[c2,#8*(3+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[c0,8*3]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[c2,#8*(4+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[c0,8*4]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[c0,8*5]\n\tadds\tx19,x20,x5\n\tadcs\tx20,x21,x6\n\tadcs\tx21,x22,x7\n\tadcs\tx22,x23,x8\n\tadcs\tx23,x24,x9\n\tadc\tx24,x25,x10\n\n\tstp\tx19,x20,[c0,#48]\n\tstp\tx21,x22,[c0,#64]\n\tstp\tx23,x24,[c0,#80]\n\n\tret\n.size\t__mul_384,.-__mul_384\n\n.globl\tmul_382x\n.hidden\tmul_382x\n.type\tmul_382x,%function\n.align\t5\nmul_382x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp,csp,#96\t\t// space for two 384-bit vectors\n\n\tldp\tx11,x12,[c1]\n\tmov\tc26,c0\t\t// save r_ptr\n\tldp\tx19,x20,[c1,#48]\n\tmov\tc27,c1\t\t// save a_ptr\n\tldp\tx13,x14,[c1,#16]\n\tmov\tc28,c2\t\t// save b_ptr\n\tldp\tx21,x22,[c1,#64]\n\tldp\tx15,x16,[c1,#32]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx23,x24,[c1,#80]\n\tadcs\tx6,x12,x20\n\tldp\tx11,x12,[c2]\n\tadcs\tx7,x13,x21\n\tldp\tx19,x20,[c2,#48]\n\tadcs\tx8,x14,x22\n\tldp\tx13,x14,[c2,#16]\n\tadcs\tx9,x15,x23\n\tldp\tx21,x22,[c2,#64]\n\tadc\tx10,x16,x24\n\tldp\tx15,x16,[c2,#32]\n\n\tstp\tx5,x6,[csp]\n\tadds\tx5,x11,x19\t// t1 = b->re + b->im\n\tldp\tx23,x24,[c2,#80]\n\tadcs\tx6,x12,x20\n\tstp\tx7,x8,[csp,#16]\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tstp\tx9,x10,[csp,#32]\n\tadcs\tx9,x15,x23\n\tstp\tx5,x6,[csp,#48]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[csp,#64]\n\tstp\tx9,x10,[csp,#80]\n\n\tbl\t__mul_384\t\t// mul_384(ret->re, a->re, b->re)\n\n\tadd\tc1,csp,#0\n\tadd\tc2,csp,#48\n\tadd\tc0,c26,#96\n\tbl\t__mul_384\n\n\tadd\tc1,c27,#48\n\tadd\tc2,c28,#48\n\tadd\tc0,csp,#0\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[c3]\n\tldp\tx7,x8,[c3,#16]\n\tldp\tx9,x10,[c3,#32]\n\n\tadd\tc1,c26,#96\n\tadd\tc2,csp,#0\n\tadd\tc0,c26,#96\n\tbl\t__sub_mod_384x384\n\n\tadd\tc2,c26,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tc1,c26,#0\n\tadd\tc2,csp,#0\n\tadd\tc0,c26,#0\n\tbl\t__sub_mod_384x384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_382x,.-mul_382x\n\n.globl\tsqr_382x\n.hidden\tsqr_382x\n.type\tsqr_382x,%function\n.align\t5\nsqr_382x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[c1]\n\tldp\tx19,x20,[c1,#48]\n\tldp\tx13,x14,[c1,#16]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx21,x22,[c1,#64]\n\tadcs\tx6,x12,x20\n\tldp\tx15,x16,[c1,#32]\n\tadcs\tx7,x13,x21\n\tldp\tx23,x24,[c1,#80]\n\tadcs\tx8,x14,x22\n\tstp\tx5,x6,[c0]\n\tadcs\tx9,x15,x23\n\tldp\tx5,x6,[c2]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[c0,#16]\n\n\tsubs\tx11,x11,x19\t// t1 = a->re - a->im\n\tldp\tx7,x8,[c2,#16]\n\tsbcs\tx12,x12,x20\n\tstp\tx9,x10,[c0,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx9,x10,[c2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x25\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x25\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x25\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x25\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[c0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[c0,#64]\n\tstp\tx15,x16,[c0,#80]\n\n\tmov\tc4,c1\t\t// save a_ptr\n\tadd\tc1,c0,#0\n\tadd\tc2,c0,#48\n\tbl\t__mul_384\n\n\tadd\tc1,c4,#0\n\tadd\tc2,c4,#48\n\tadd\tc0,c0,#96\n\tbl\t__mul_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[c0]\n\tldp\tx13,x14,[c0,#16]\n\tadds\tx11,x11,x11\t// add with itself\n\tldp\tx15,x16,[c0,#32]\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadcs\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tstp\tx11,x12,[c0]\n\tadcs\tx21,x21,x21\n\tstp\tx13,x14,[c0,#16]\n\tadcs\tx22,x22,x22\n\tstp\tx15,x16,[c0,#32]\n\tadcs\tx23,x23,x23\n\tstp\tx19,x20,[c0,#48]\n\tadc\tx24,x24,x24\n\tstp\tx21,x22,[c0,#64]\n\tstp\tx23,x24,[c0,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_382x,.-sqr_382x\n\n.globl\tsqr_mont_382x\n.hidden\tsqr_mont_382x\n.type\tsqr_mont_382x,%function\n.align\t5\nsqr_mont_382x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc3,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tcsp,csp,#112\t\t// space for two 384-bit vectors + word\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx11,x12,[c1]\n\tldp\tx13,x14,[c1,#16]\n\tldp\tx15,x16,[c1,#32]\n\n\tldp\tx17,x20,[c1,#48]\n\tldp\tx21,x22,[c1,#64]\n\tldp\tx23,x24,[c1,#80]\n\n\tadds\tx5,x11,x17\t// t0 = a->re + a->im\n\tadcs\tx6,x12,x20\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tadcs\tx9,x15,x23\n\tadc\tx10,x16,x24\n\n\tsubs\tx19,x11,x17\t// t1 = a->re - a->im\n\tsbcs\tx20,x12,x20\n\tsbcs\tx21,x13,x21\n\tsbcs\tx22,x14,x22\n\tsbcs\tx23,x15,x23\n\tsbcs\tx24,x16,x24\n\tsbc\tx25,xzr,xzr\t\t// borrow flag as mask\n\n\tstp\tx5,x6,[csp]\n\tstp\tx7,x8,[csp,#16]\n\tstp\tx9,x10,[csp,#32]\n\tstp\tx19,x20,[csp,#48]\n\tstp\tx21,x22,[csp,#64]\n\tstp\tx23,x24,[csp,#80]\n\tstr\tx25,[csp,#96]\n\n\tldp\tx5,x6,[c2]\n\tldp\tx7,x8,[c2,#16]\n\tldp\tx9,x10,[c2,#32]\n\n\tadd\tc2,c1,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx19,x11,x11\t// add with itself\n\tadcs\tx20,x12,x12\n\tadcs\tx21,x13,x13\n\tadcs\tx22,x14,x14\n\tadcs\tx23,x15,x15\n\tadc\tx24,x16,x16\n\n\tstp\tx19,x20,[c2,#48]\n\tstp\tx21,x22,[c2,#64]\n\tstp\tx23,x24,[c2,#80]\n\n\tldp\tx11,x12,[csp]\n\tldr\tx17,[csp,#48]\n\tldp\tx13,x14,[csp,#16]\n\tldp\tx15,x16,[csp,#32]\n\n\tadd\tc2,csp,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, t0, t1)\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldr\tx25,[csp,#96]\t// account for sign from a->re - a->im\n\tldp\tx19,x20,[csp]\n\tldp\tx21,x22,[csp,#16]\n\tldp\tx23,x24,[csp,#32]\n\n\tand\tx19,x19,x25\n\tand\tx20,x20,x25\n\tand\tx21,x21,x25\n\tand\tx22,x22,x25\n\tand\tx23,x23,x25\n\tand\tx24,x24,x25\n\n\tsubs\tx11,x11,x19\n\tsbcs\tx12,x12,x20\n\tsbcs\tx13,x13,x21\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tand\tx21,x7,x25\n\tand\tx22,x8,x25\n\tand\tx23,x9,x25\n\tand\tx24,x10,x25\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tstp\tx11,x12,[c2]\n\tstp\tx13,x14,[c2,#16]\n\tstp\tx15,x16,[c2,#32]\n\n\tadd\tcsp,csp,#112\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_382x,.-sqr_mont_382x\n\n.type\t__mul_mont_383_nonred,%function\n.align\t5\n__mul_mont_383_nonred:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[c2,8*1]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[c2,8*2]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[c2,8*3]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[c2,8*4]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[c2,8*5]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[c29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\tldp\tc4,c2,[c29,#12*__SIZEOF_POINTER__]\t\t// pull r_ptr\n\n\tadds\tx11,x20,x26\n\tadcs\tx12,x21,x27\n\tadcs\tx13,x22,x28\n\tadcs\tx14,x23,x0\n\tadcs\tx15,x24,x1\n\tadcs\tx16,x25,x3\n\n\tret\n.size\t__mul_mont_383_nonred,.-__mul_mont_383_nonred\n\n.globl\tsgn0_pty_mont_384\n.hidden\tsgn0_pty_mont_384\n.type\tsgn0_pty_mont_384,%function\n.align\t5\nsgn0_pty_mont_384:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[c1]\n\tldp\tx7,x8,[c1,#16]\n\tldp\tx9,x10,[c1,#32]\n\tmov\tc1,c0\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\tadds\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsgn0_pty_mont_384,.-sgn0_pty_mont_384\n\n.globl\tsgn0_pty_mont_384x\n.hidden\tsgn0_pty_mont_384x\n.type\tsgn0_pty_mont_384x,%function\n.align\t5\nsgn0_pty_mont_384x:\n\thint\t#PACI_HINT\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[c1]\n\tldp\tx7,x8,[c1,#16]\n\tldp\tx9,x10,[c1,#32]\n\tmov\tc1,c0\n\n\tbl\t__mul_by_1_mont_384\n\tadd\tc1,c1,#48\n\n\tand\tx2,x11,#1\n\torr\tx3,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\torr\tx3,x3,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx2,x2,x17\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\torr\tx1,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\torr\tx1,x1,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t\t// pack sign and parity\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsgn0_pty_mont_384x,.-sgn0_pty_mont_384x\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/cheri/sha256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n//\n// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n//\n// ====================================================================\n// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n// project.\n// ====================================================================\n//\n// sha256_block procedure for ARMv8.\n//\n// This module is stripped of scalar code paths, with rationale that all\n// known processors are NEON-capable.\n//\n// See original module at CRYPTOGAMS for further details.\n\n.comm\t__blst_platform_cap,4\n.text\n\n.align\t6\n.type\t.LK256,%object\n.LK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n.long\t0\t//terminator\n.size\t.LK256,.-.LK256\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.align\t2\n.align\t2\n.globl\tblst_sha256_block_armv8\n.hidden\tblst_sha256_block_armv8\n.type\tblst_sha256_block_armv8,%function\n.align\t6\nblst_sha256_block_armv8:\n\thint\t#34\n.Lv8_entry:\n\tstp\tc29,c30,[csp,#-2*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\n\tld1\t{v0.4s,v1.4s},[c0]\n\tadr\tc3,.LK256\n\n.Loop_hw:\n\tld1\t{v4.16b,v5.16b,v6.16b,v7.16b},[c1],#64\n\tsub\tx2,x2,#1\n\tld1\t{v16.4s},[c3],#16\n\trev32\tv4.16b,v4.16b\n\trev32\tv5.16b,v5.16b\n\trev32\tv6.16b,v6.16b\n\trev32\tv7.16b,v7.16b\n\torr\tv18.16b,v0.16b,v0.16b\t\t// offload\n\torr\tv19.16b,v1.16b,v1.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.inst\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.inst\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.inst\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.inst\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.inst\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.inst\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.inst\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.inst\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.inst\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.inst\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.inst\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.inst\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[c3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tld1\t{v16.4s},[c3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tld1\t{v17.4s},[c3]\n\tadd\tv16.4s,v16.4s,v6.4s\n\tsub\tx3,x3,#64*4-16\t// rewind\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tadd\tv17.4s,v17.4s,v7.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tadd\tv0.4s,v0.4s,v18.4s\n\tadd\tv1.4s,v1.4s,v19.4s\n\n\tcbnz\tx2,.Loop_hw\n\n\tst1\t{v0.4s,v1.4s},[c0]\n\n\tldr\tc29,[csp],#2*__SIZEOF_POINTER__\n\tret\n.size\tblst_sha256_block_armv8,.-blst_sha256_block_armv8\n.globl\tblst_sha256_block_data_order\n.hidden\tblst_sha256_block_data_order\n.type\tblst_sha256_block_data_order,%function\n.align\t4\nblst_sha256_block_data_order:\n\thint\t#34\n\tadrp\tc16,__blst_platform_cap\n\tldr\tw16,[c16,#:lo12:__blst_platform_cap]\n\ttst\tw16,#1\n\tb.ne\t.Lv8_entry\n\n\tstp\tc29, c30, [csp, #-2*__SIZEOF_POINTER__]!\n\tmov\tc29, csp\n\tsub\tcsp,csp,#16*4\n\n\tadr\tc16,.LK256\n\tadd\tx2,x1,x2,lsl#6\t// len to point at the end of inp\n\n\tld1\t{v0.16b},[c1], #16\n\tld1\t{v1.16b},[c1], #16\n\tld1\t{v2.16b},[c1], #16\n\tld1\t{v3.16b},[c1], #16\n\tld1\t{v4.4s},[c16], #16\n\tld1\t{v5.4s},[c16], #16\n\tld1\t{v6.4s},[c16], #16\n\tld1\t{v7.4s},[c16], #16\n\trev32\tv0.16b,v0.16b\t\t// yes, even on\n\trev32\tv1.16b,v1.16b\t\t// big-endian\n\trev32\tv2.16b,v2.16b\n\trev32\tv3.16b,v3.16b\n\tmov\tc17,csp\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tv5.4s,v5.4s,v1.4s\n\tadd\tv6.4s,v6.4s,v2.4s\n\tst1\t{v4.4s,v5.4s},[c17], #32\n\tadd\tv7.4s,v7.4s,v3.4s\n\tst1\t{v6.4s,v7.4s},[c17]\n\tsub\tc17,c17,#32\n\n\tldp\tw3,w4,[c0]\n\tldp\tw5,w6,[c0,#8]\n\tldp\tw7,w8,[c0,#16]\n\tldp\tw9,w10,[c0,#24]\n\tldr\tw12,[csp,#0]\n\tmov\tw13,wzr\n\teor\tw14,w4,w5\n\tmov\tw15,wzr\n\tb\t.L_00_48\n\n.align\t4\n.L_00_48:\n\text\tv4.16b,v0.16b,v1.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v2.16b,v3.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v3.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv0.4s,v0.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[csp,#4]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv0.4s,v0.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[csp,#8]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv0.4s,v0.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v0.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v0.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v0.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v0.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v0.4s,#13\n\tldr\tw12,[csp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[c16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv0.4s,v0.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[csp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[c17], #16\n\text\tv4.16b,v1.16b,v2.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v3.16b,v0.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v0.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv1.4s,v1.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[csp,#20]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv1.4s,v1.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[csp,#24]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv1.4s,v1.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v1.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v1.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v1.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v1.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v1.4s,#13\n\tldr\tw12,[csp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[c16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv1.4s,v1.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[csp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[c17], #16\n\text\tv4.16b,v2.16b,v3.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v0.16b,v1.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v1.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv2.4s,v2.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[csp,#36]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv2.4s,v2.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[csp,#40]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv2.4s,v2.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v2.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v2.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v2.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v2.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v2.4s,#13\n\tldr\tw12,[csp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[c16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv2.4s,v2.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[csp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[c17], #16\n\text\tv4.16b,v3.16b,v0.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v1.16b,v2.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v2.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv3.4s,v3.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[csp,#52]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv3.4s,v3.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[csp,#56]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv3.4s,v3.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v3.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v3.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v3.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v3.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v3.4s,#13\n\tldr\tw12,[csp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[c16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv3.4s,v3.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[c16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[c17], #16\n\tcmp\tw12,#0\t\t\t\t// check for K256 terminator\n\tldr\tw12,[csp,#0]\n\tsub\tc17,c17,#64\n\tbne\t.L_00_48\n\n\tsub\tc16,c16,#256\n\tcmp\tx1,x2\n\tmov\tx17, #-64\n\tcsel\tx17, x17, xzr, eq\n\tadd\tc1,c1,x17\n\tmov\tc17,csp\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v0.16b},[c1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[c16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv0.16b,v0.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[csp,#4]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[csp,#8]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[csp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[csp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[c17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v1.16b},[c1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[c16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv1.16b,v1.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[csp,#20]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[csp,#24]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[csp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[csp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[c17], #16\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v2.16b},[c1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[c16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv2.16b,v2.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[csp,#36]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[csp,#40]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[csp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[csp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[c17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v3.16b},[c1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[c16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv3.16b,v3.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[csp,#52]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[csp,#56]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[csp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[c17], #16\n\tadd\tw3,w3,w15\t\t\t// h+=Sigma0(a) from the past\n\tldp\tw11,w12,[c0,#0]\n\tadd\tw3,w3,w13\t\t\t// h+=Maj(a,b,c) from the past\n\tldp\tw13,w14,[c0,#8]\n\tadd\tw3,w3,w11\t\t\t// accumulate\n\tadd\tw4,w4,w12\n\tldp\tw11,w12,[c0,#16]\n\tadd\tw5,w5,w13\n\tadd\tw6,w6,w14\n\tldp\tw13,w14,[c0,#24]\n\tadd\tw7,w7,w11\n\tadd\tw8,w8,w12\n\tldr\tw12,[csp,#0]\n\tstp\tw3,w4,[c0,#0]\n\tadd\tw9,w9,w13\n\tmov\tw13,wzr\n\tstp\tw5,w6,[c0,#8]\n\tadd\tw10,w10,w14\n\tstp\tw7,w8,[c0,#16]\n\teor\tw14,w4,w5\n\tstp\tw9,w10,[c0,#24]\n\tmov\tw15,wzr\n\tmov\tc17,csp\n\tb.ne\t.L_00_48\n\n\tldr\tc29,[c29]\n\tadd\tcsp,csp,#16*4+2*__SIZEOF_POINTER__\n\tret\n.size\tblst_sha256_block_data_order,.-blst_sha256_block_data_order\n.globl\tblst_sha256_emit\n.hidden\tblst_sha256_emit\n.type\tblst_sha256_emit,%function\n.align\t4\nblst_sha256_emit:\n\thint\t#34\n\tldp\tx4,x5,[c1]\n\tldp\tx6,x7,[c1,#16]\n#ifndef\t__AARCH64EB__\n\trev\tx4,x4\n\trev\tx5,x5\n\trev\tx6,x6\n\trev\tx7,x7\n#endif\n\tstr\tw4,[c0,#4]\n\tlsr\tx4,x4,#32\n\tstr\tw5,[c0,#12]\n\tlsr\tx5,x5,#32\n\tstr\tw6,[c0,#20]\n\tlsr\tx6,x6,#32\n\tstr\tw7,[c0,#28]\n\tlsr\tx7,x7,#32\n\tstr\tw4,[c0,#0]\n\tstr\tw5,[c0,#8]\n\tstr\tw6,[c0,#16]\n\tstr\tw7,[c0,#24]\n\tret\n.size\tblst_sha256_emit,.-blst_sha256_emit\n\n.globl\tblst_sha256_bcopy\n.hidden\tblst_sha256_bcopy\n.type\tblst_sha256_bcopy,%function\n.align\t4\nblst_sha256_bcopy:\n\thint\t#34\n.Loop_bcopy:\n\tldrb\tw3,[c1],#1\n\tsub\tx2,x2,#1\n\tstrb\tw3,[c0],#1\n\tcbnz\tx2,.Loop_bcopy\n\tret\n.size\tblst_sha256_bcopy,.-blst_sha256_bcopy\n\n.globl\tblst_sha256_hcopy\n.hidden\tblst_sha256_hcopy\n.type\tblst_sha256_hcopy,%function\n.align\t4\nblst_sha256_hcopy:\n\thint\t#34\n\tldp\tx4,x5,[c1]\n\tldp\tx6,x7,[c1,#16]\n\tstp\tx4,x5,[c0]\n\tstp\tx6,x7,[c0,#16]\n\tret\n.size\tblst_sha256_hcopy,.-blst_sha256_hcopy\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/coff/add_mod_256-armv8.S",
    "content": ".text\n\n.globl\tadd_mod_256\n\n.def\tadd_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nadd_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tadds\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tadcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tadcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\tmul_by_3_mod_256\n\n.def\tmul_by_3_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nmul_by_3_mod_256:\n\thint\t#34\n\tldp\tx12,x13,[x1]\n\tldp\tx14,x15,[x1,#16]\n\n\tadds\tx8,x12,x12\n\tldp\tx4,x5,[x2]\n\tadcs\tx9,x13,x13\n\tldp\tx6,x7,[x2,#16]\n\tadcs\tx10,x14,x14\n\tadcs\tx11,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\tadds\tx8,x8,x12\n\tadcs\tx9,x9,x13\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\tlshift_mod_256\n\n.def\tlshift_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nlshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\n.Loop_lshift_mod_256:\n\tadds\tx8,x8,x8\n\tsub\tx2,x2,#1\n\tadcs\tx9,x9,x9\n\tadcs\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx12,x8,x4\n\tsbcs\tx13,x9,x5\n\tsbcs\tx14,x10,x6\n\tsbcs\tx15,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x12,lo\n\tcsel\tx9,x9,x13,lo\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\n\tcbnz\tx2,.Loop_lshift_mod_256\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\trshift_mod_256\n\n.def\trshift_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nrshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\n.Loop_rshift:\n\tadds\tx12,x8,x4\n\tsub\tx2,x2,#1\n\tadcs\tx13,x9,x5\n\tadcs\tx14,x10,x6\n\tadcs\tx15,x11,x7\n\tadc\tx3,xzr,xzr\n\ttst\tx8,#1\n\n\tcsel\tx12,x12,x8,ne\n\tcsel\tx13,x13,x9,ne\n\tcsel\tx14,x14,x10,ne\n\tcsel\tx15,x15,x11,ne\n\tcsel\tx3,x3,xzr,ne\n\n\textr\tx8,x13,x12,#1\n\textr\tx9,x14,x13,#1\n\textr\tx10,x15,x14,#1\n\textr\tx11,x3,x15,#1\n\n\tcbnz\tx2,.Loop_rshift\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\tcneg_mod_256\n\n.def\tcneg_mod_256;\n.type\t32;\n.endef\n.p2align\t5\ncneg_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx4,x5,[x3]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx12,x4,x8\n\tldp\tx6,x7,[x3,#16]\n\torr\tx4,x8,x9\n\tsbcs\tx13,x5,x9\n\torr\tx5,x10,x11\n\tsbcs\tx14,x6,x10\n\torr\tx3,x4,x5\n\tsbc\tx15,x7,x11\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx8,x8,x12,eq\n\tcsel\tx9,x9,x13,eq\n\tcsel\tx10,x10,x14,eq\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x15,eq\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\tsub_mod_256\n\n.def\tsub_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nsub_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tstp\tx8,x9,[x0]\n\tadc\tx11,x11,x7\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\tcheck_mod_256\n\n.def\tcheck_mod_256;\n.type\t32;\n.endef\n.p2align\t5\ncheck_mod_256:\n\tldp\tx8,x9,[x0]\n\tldp\tx10,x11,[x0,#16]\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tsubs\txzr,x8,x4\n\tsbcs\txzr,x9,x5\n\torr\tx8,x8,x9\n\tsbcs\txzr,x10,x6\n\torr\tx8,x8,x10\n\tsbcs\txzr,x11,x7\n\torr\tx8,x8,x11\n\tsbc\tx1,xzr,xzr\n\n\tcmp\tx8,#0\n\tmov\tx0,#1\n\tcsel\tx0,x0,xzr,ne\n\tand\tx0,x0,x1\n\n\tret\n\n\n.globl\tadd_n_check_mod_256\n\n.def\tadd_n_check_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nadd_n_check_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tadds\tx8,x8,x12\n\tldp\tx4,x5,[x3]\n\tadcs\tx9,x9,x13\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n\n\n.globl\tsub_n_check_mod_256\n\n.def\tsub_n_check_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nsub_n_check_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tsubs\tx8,x8,x12\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tadc\tx11,x11,x7\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n\n"
  },
  {
    "path": "build/coff/add_mod_256-x86_64.s",
    "content": ".text\t\n\n.globl\tadd_mod_256\n\n.def\tadd_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nadd_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_add_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_add_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n.Loaded_a_add_mod_256:\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tmovq\t%r8,%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r9,%rsi\n\tadcq\t24(%rdx),%r11\n\tsbbq\t%rdx,%rdx\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_add_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_add_mod_256:\n\n\n.globl\tmul_by_3_mod_256\n\n.def\tmul_by_3_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_by_3_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_by_3_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n.LSEH_body_mul_by_3_mod_256:\n\n\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%rsi,%rdx\n\tmovq\t24(%rsi),%r11\n\n\tcall\t__lshift_mod_256\n\tmovq\t0(%rsp),%r12\n\n\tjmp\t.Loaded_a_add_mod_256\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_mul_by_3_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_by_3_mod_256:\n\n.def\t__lshift_mod_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__lshift_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tmovq\t%r8,%rax\n\tadcq\t%r10,%r10\n\tmovq\t%r9,%rsi\n\tadcq\t%r11,%r11\n\tsbbq\t%r12,%r12\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tcmovcq\t%rbx,%r10\n\tcmovcq\t%rbp,%r11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rax\n\tlfence\n\tjmpq\t*%rax\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n\n.globl\tlshift_mod_256\n\n.def\tlshift_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nlshift_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_lshift_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n.LSEH_body_lshift_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n.Loop_lshift_mod_256:\n\tcall\t__lshift_mod_256\n\tdecl\t%edx\n\tjnz\t.Loop_lshift_mod_256\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t0(%rsp),%r12\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_lshift_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_lshift_mod_256:\n\n\n.globl\trshift_mod_256\n\n.def\trshift_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nrshift_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_rshift_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_rshift_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rbp\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n.Loop_rshift_mod_256:\n\tmovq\t%rbp,%r8\n\tandq\t$1,%rbp\n\tmovq\t0(%rcx),%rax\n\tnegq\t%rbp\n\tmovq\t8(%rcx),%rsi\n\tmovq\t16(%rcx),%rbx\n\n\tandq\t%rbp,%rax\n\tandq\t%rbp,%rsi\n\tandq\t%rbp,%rbx\n\tandq\t24(%rcx),%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tadcq\t%rbx,%r10\n\tadcq\t%rbp,%r11\n\tsbbq\t%rax,%rax\n\n\tshrq\t$1,%r8\n\tmovq\t%r9,%rbp\n\tshrq\t$1,%r9\n\tmovq\t%r10,%rbx\n\tshrq\t$1,%r10\n\tmovq\t%r11,%rsi\n\tshrq\t$1,%r11\n\n\tshlq\t$63,%rbp\n\tshlq\t$63,%rbx\n\torq\t%r8,%rbp\n\tshlq\t$63,%rsi\n\torq\t%rbx,%r9\n\tshlq\t$63,%rax\n\torq\t%rsi,%r10\n\torq\t%rax,%r11\n\n\tdecl\t%edx\n\tjnz\t.Loop_rshift_mod_256\n\n\tmovq\t%rbp,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_rshift_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_rshift_mod_256:\n\n\n.globl\tcneg_mod_256\n\n.def\tcneg_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\ncneg_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_cneg_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n.LSEH_body_cneg_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r12\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%r12,%r8\n\tmovq\t24(%rsi),%r11\n\torq\t%r9,%r12\n\torq\t%r10,%r12\n\torq\t%r11,%r12\n\tmovq\t$-1,%rbp\n\n\tmovq\t0(%rcx),%rax\n\tcmovnzq\t%rbp,%r12\n\tmovq\t8(%rcx),%rsi\n\tmovq\t16(%rcx),%rbx\n\tandq\t%r12,%rax\n\tmovq\t24(%rcx),%rbp\n\tandq\t%r12,%rsi\n\tandq\t%r12,%rbx\n\tandq\t%r12,%rbp\n\n\tsubq\t%r8,%rax\n\tsbbq\t%r9,%rsi\n\tsbbq\t%r10,%rbx\n\tsbbq\t%r11,%rbp\n\n\torq\t%rdx,%rdx\n\n\tcmovzq\t%r8,%rax\n\tcmovzq\t%r9,%rsi\n\tmovq\t%rax,0(%rdi)\n\tcmovzq\t%r10,%rbx\n\tmovq\t%rsi,8(%rdi)\n\tcmovzq\t%r11,%rbp\n\tmovq\t%rbx,16(%rdi)\n\tmovq\t%rbp,24(%rdi)\n\n\tmovq\t0(%rsp),%r12\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_cneg_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_cneg_mod_256:\n\n\n.globl\tsub_mod_256\n\n.def\tsub_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsub_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sub_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sub_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%rax\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%rsi\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbp\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rsi\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_sub_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sub_mod_256:\n\n\n.globl\tcheck_mod_256\n\n.def\tcheck_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\ncheck_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_check_mod_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%rax\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\n\tmovq\t%rax,%r8\n\torq\t%r9,%rax\n\torq\t%r10,%rax\n\torq\t%r11,%rax\n\n\tsubq\t0(%rsi),%r8\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t%rsi,%rsi\n\n\tmovq\t$1,%rdx\n\tcmpq\t$0,%rax\n\tcmovneq\t%rdx,%rax\n\tandq\t%rsi,%rax\n.LSEH_epilogue_check_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_check_mod_256:\n\n\n.globl\tadd_n_check_mod_256\n\n.def\tadd_n_check_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nadd_n_check_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_add_n_check_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_add_n_check_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tmovq\t%r8,%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r9,%rsi\n\tadcq\t24(%rdx),%r11\n\tsbbq\t%rdx,%rdx\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\torq\t%r10,%r8\n\tmovq\t$1,%rax\n\tcmovzq\t%r8,%rax\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_add_n_check_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_add_n_check_mod_256:\n\n\n.globl\tsub_n_check_mod_256\n\n.def\tsub_n_check_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsub_n_check_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sub_n_check_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sub_n_check_mod_256:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%rax\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%rsi\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbp\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rsi\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\torq\t%r10,%r8\n\tmovq\t$1,%rax\n\tcmovzq\t%r8,%rax\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_sub_n_check_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sub_n_check_mod_256:\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_add_mod_256\n.rva\t.LSEH_body_add_mod_256\n.rva\t.LSEH_info_add_mod_256_prologue\n\n.rva\t.LSEH_body_add_mod_256\n.rva\t.LSEH_epilogue_add_mod_256\n.rva\t.LSEH_info_add_mod_256_body\n\n.rva\t.LSEH_epilogue_add_mod_256\n.rva\t.LSEH_end_add_mod_256\n.rva\t.LSEH_info_add_mod_256_epilogue\n\n.rva\t.LSEH_begin_mul_by_3_mod_256\n.rva\t.LSEH_body_mul_by_3_mod_256\n.rva\t.LSEH_info_mul_by_3_mod_256_prologue\n\n.rva\t.LSEH_body_mul_by_3_mod_256\n.rva\t.LSEH_epilogue_mul_by_3_mod_256\n.rva\t.LSEH_info_mul_by_3_mod_256_body\n\n.rva\t.LSEH_epilogue_mul_by_3_mod_256\n.rva\t.LSEH_end_mul_by_3_mod_256\n.rva\t.LSEH_info_mul_by_3_mod_256_epilogue\n\n.rva\t.LSEH_begin_lshift_mod_256\n.rva\t.LSEH_body_lshift_mod_256\n.rva\t.LSEH_info_lshift_mod_256_prologue\n\n.rva\t.LSEH_body_lshift_mod_256\n.rva\t.LSEH_epilogue_lshift_mod_256\n.rva\t.LSEH_info_lshift_mod_256_body\n\n.rva\t.LSEH_epilogue_lshift_mod_256\n.rva\t.LSEH_end_lshift_mod_256\n.rva\t.LSEH_info_lshift_mod_256_epilogue\n\n.rva\t.LSEH_begin_rshift_mod_256\n.rva\t.LSEH_body_rshift_mod_256\n.rva\t.LSEH_info_rshift_mod_256_prologue\n\n.rva\t.LSEH_body_rshift_mod_256\n.rva\t.LSEH_epilogue_rshift_mod_256\n.rva\t.LSEH_info_rshift_mod_256_body\n\n.rva\t.LSEH_epilogue_rshift_mod_256\n.rva\t.LSEH_end_rshift_mod_256\n.rva\t.LSEH_info_rshift_mod_256_epilogue\n\n.rva\t.LSEH_begin_cneg_mod_256\n.rva\t.LSEH_body_cneg_mod_256\n.rva\t.LSEH_info_cneg_mod_256_prologue\n\n.rva\t.LSEH_body_cneg_mod_256\n.rva\t.LSEH_epilogue_cneg_mod_256\n.rva\t.LSEH_info_cneg_mod_256_body\n\n.rva\t.LSEH_epilogue_cneg_mod_256\n.rva\t.LSEH_end_cneg_mod_256\n.rva\t.LSEH_info_cneg_mod_256_epilogue\n\n.rva\t.LSEH_begin_sub_mod_256\n.rva\t.LSEH_body_sub_mod_256\n.rva\t.LSEH_info_sub_mod_256_prologue\n\n.rva\t.LSEH_body_sub_mod_256\n.rva\t.LSEH_epilogue_sub_mod_256\n.rva\t.LSEH_info_sub_mod_256_body\n\n.rva\t.LSEH_epilogue_sub_mod_256\n.rva\t.LSEH_end_sub_mod_256\n.rva\t.LSEH_info_sub_mod_256_epilogue\n\n.rva\t.LSEH_epilogue_check_mod_256\n.rva\t.LSEH_end_check_mod_256\n.rva\t.LSEH_info_check_mod_256_epilogue\n\n.rva\t.LSEH_begin_add_n_check_mod_256\n.rva\t.LSEH_body_add_n_check_mod_256\n.rva\t.LSEH_info_add_n_check_mod_256_prologue\n\n.rva\t.LSEH_body_add_n_check_mod_256\n.rva\t.LSEH_epilogue_add_n_check_mod_256\n.rva\t.LSEH_info_add_n_check_mod_256_body\n\n.rva\t.LSEH_epilogue_add_n_check_mod_256\n.rva\t.LSEH_end_add_n_check_mod_256\n.rva\t.LSEH_info_add_n_check_mod_256_epilogue\n\n.rva\t.LSEH_begin_sub_n_check_mod_256\n.rva\t.LSEH_body_sub_n_check_mod_256\n.rva\t.LSEH_info_sub_n_check_mod_256_prologue\n\n.rva\t.LSEH_body_sub_n_check_mod_256\n.rva\t.LSEH_epilogue_sub_n_check_mod_256\n.rva\t.LSEH_info_sub_n_check_mod_256_body\n\n.rva\t.LSEH_epilogue_sub_n_check_mod_256\n.rva\t.LSEH_end_sub_n_check_mod_256\n.rva\t.LSEH_info_sub_n_check_mod_256_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_add_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_add_mod_256_body:\n.byte\t1,0,9,0\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_add_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_by_3_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_by_3_mod_256_body:\n.byte\t1,0,11,0\n.byte\t0x00,0xc4,0x00,0x00\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.LSEH_info_mul_by_3_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_lshift_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_lshift_mod_256_body:\n.byte\t1,0,11,0\n.byte\t0x00,0xc4,0x00,0x00\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.LSEH_info_lshift_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_rshift_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_rshift_mod_256_body:\n.byte\t1,0,9,0\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_rshift_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_cneg_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_cneg_mod_256_body:\n.byte\t1,0,11,0\n.byte\t0x00,0xc4,0x00,0x00\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.LSEH_info_cneg_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sub_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sub_mod_256_body:\n.byte\t1,0,9,0\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sub_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_check_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_add_n_check_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_add_n_check_mod_256_body:\n.byte\t1,0,9,0\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_add_n_check_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sub_n_check_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sub_n_check_mod_256_body:\n.byte\t1,0,9,0\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sub_n_check_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/add_mod_384-armv8.S",
    "content": ".text\n\n.globl\tadd_mod_384\n\n.def\tadd_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nadd_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__add_mod_384;\n.type\t32;\n.endef\n.p2align\t5\n__add_mod_384:\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n__add_mod_384_ab_are_loaded:\n\tadds\tx10,x10,x16\n\tadcs\tx11,x11,x17\n\tadcs\tx12,x12,x19\n\tadcs\tx13,x13,x20\n\tadcs\tx14,x14,x21\n\tadcs\tx15,x15,x22\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n\n\n.globl\tadd_mod_384x\n\n.def\tadd_mod_384x;\n.type\t32;\n.endef\n.p2align\t5\nadd_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\trshift_mod_384\n\n.def\trshift_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nrshift_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n.Loop_rshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__rshift_mod_384\n\tcbnz\tx2,.Loop_rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__rshift_mod_384;\n.type\t32;\n.endef\n.p2align\t5\n__rshift_mod_384:\n\tsbfx\tx22,x10,#0,#1\n\tand\tx16,x22,x4\n\tand\tx17,x22,x5\n\tadds\tx10,x10,x16\n\tand\tx19,x22,x6\n\tadcs\tx11,x11,x17\n\tand\tx20,x22,x7\n\tadcs\tx12,x12,x19\n\tand\tx21,x22,x8\n\tadcs\tx13,x13,x20\n\tand\tx22,x22,x9\n\tadcs\tx14,x14,x21\n\textr\tx10,x11,x10,#1\t// a[0:5] >>= 1\n\tadcs\tx15,x15,x22\n\textr\tx11,x12,x11,#1\n\tadc\tx22,xzr,xzr\n\textr\tx12,x13,x12,#1\n\textr\tx13,x14,x13,#1\n\textr\tx14,x15,x14,#1\n\textr\tx15,x22,x15,#1\n\tret\n\n\n.globl\tdiv_by_2_mod_384\n\n.def\tdiv_by_2_mod_384;\n.type\t32;\n.endef\n.p2align\t5\ndiv_by_2_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tlshift_mod_384\n\n.def\tlshift_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nlshift_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n.Loop_lshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__lshift_mod_384\n\tcbnz\tx2,.Loop_lshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__lshift_mod_384;\n.type\t32;\n.endef\n.p2align\t5\n__lshift_mod_384:\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n\n\n.globl\tmul_by_3_mod_384\n\n.def\tmul_by_3_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nmul_by_3_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tmul_by_8_mod_384\n\n.def\tmul_by_8_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nmul_by_8_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tmul_by_3_mod_384x\n\n.def\tmul_by_3_mod_384x;\n.type\t32;\n.endef\n.p2align\t5\nmul_by_3_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1,#48]\n\tldp\tx19,x20,[x1,#64]\n\tldp\tx21,x22,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tmul_by_8_mod_384x\n\n.def\tmul_by_8_mod_384x;\n.type\t32;\n.endef\n.p2align\t5\nmul_by_8_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tcneg_mod_384\n\n.def\tcneg_mod_384;\n.type\t32;\n.endef\n.p2align\t5\ncneg_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx4,x5,[x3]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx6,x7,[x3,#16]\n\n\tsubs\tx16,x4,x10\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx8,x9,[x3,#32]\n\torr\tx3,x10,x11\n\tsbcs\tx17,x5,x11\n\torr\tx3,x3,x12\n\tsbcs\tx19,x6,x12\n\torr\tx3,x3,x13\n\tsbcs\tx20,x7,x13\n\torr\tx3,x3,x14\n\tsbcs\tx21,x8,x14\n\torr\tx3,x3,x15\n\tsbc\tx22,x9,x15\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx10,x10,x16,eq\n\tcsel\tx11,x11,x17,eq\n\tcsel\tx12,x12,x19,eq\n\tcsel\tx13,x13,x20,eq\n\tstp\tx10,x11,[x0]\n\tcsel\tx14,x14,x21,eq\n\tstp\tx12,x13,[x0,#16]\n\tcsel\tx15,x15,x22,eq\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsub_mod_384\n\n.def\tsub_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nsub_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__sub_mod_384;\n.type\t32;\n.endef\n.p2align\t5\n__sub_mod_384:\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n\tsubs\tx10,x10,x16\n\tsbcs\tx11,x11,x17\n\tsbcs\tx12,x12,x19\n\tsbcs\tx13,x13,x20\n\tsbcs\tx14,x14,x21\n\tsbcs\tx15,x15,x22\n\tsbc\tx3,xzr,xzr\n\n\tand\tx16,x4,x3\n\tand\tx17,x5,x3\n\tadds\tx10,x10,x16\n\tand\tx19,x6,x3\n\tadcs\tx11,x11,x17\n\tand\tx20,x7,x3\n\tadcs\tx12,x12,x19\n\tand\tx21,x8,x3\n\tadcs\tx13,x13,x20\n\tand\tx22,x9,x3\n\tadcs\tx14,x14,x21\n\tadc\tx15,x15,x22\n\n\tret\n\n\n.globl\tsub_mod_384x\n\n.def\tsub_mod_384x;\n.type\t32;\n.endef\n.p2align\t5\nsub_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tmul_by_1_plus_i_mod_384x\n\n.def\tmul_by_1_plus_i_mod_384x;\n.type\t32;\n.endef\n.p2align\t5\nmul_by_1_plus_i_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\tadd\tx2,x1,#48\n\n\tbl\t__sub_mod_384\t\t\t// a->re - a->im\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\t// a->re + a->im\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsgn0_pty_mod_384\n\n.def\tsgn0_pty_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nsgn0_pty_mod_384:\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx0,x10,#1\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx3,x3,xzr\n\n\tmvn\tx3,x3\n\tand\tx3,x3,#2\n\torr\tx0,x0,x3\n\n\tret\n\n\n.globl\tsgn0_pty_mod_384x\n\n.def\tsgn0_pty_mod_384x;\n.type\t32;\n.endef\n.p2align\t5\nsgn0_pty_mod_384x:\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx2,x10,#1\n\torr\tx3,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx3,x3,x12\n\tadcs\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tldp\tx10,x11,[x0,#48]\n\tldp\tx12,x13,[x0,#64]\n\tldp\tx14,x15,[x0,#80]\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx2,x2,x16\n\n\tand\tx0,x10,#1\n\torr\tx1,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx1,x1,x12\n\tadcs\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx0,x0,x16\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t// pack sign and parity\n\n\tret\n\n.globl\tvec_select_32\n\n.def\tvec_select_32;\n.type\t32;\n.endef\n.p2align\t5\nvec_select_32:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d}, [x1]\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d}, [x2]\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tst1\t{v0.2d, v1.2d}, [x0]\n\tret\n\n.globl\tvec_select_48\n\n.def\tvec_select_48;\n.type\t32;\n.endef\n.p2align\t5\nvec_select_48:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n\n.globl\tvec_select_96\n\n.def\tvec_select_96;\n.type\t32;\n.endef\n.p2align\t5\nvec_select_96:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\n.globl\tvec_select_192\n\n.def\tvec_select_192;\n.type\t32;\n.endef\n.p2align\t5\nvec_select_192:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\n.globl\tvec_select_144\n\n.def\tvec_select_144;\n.type\t32;\n.endef\n.p2align\t5\nvec_select_144:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n\n.globl\tvec_select_288\n\n.def\tvec_select_288;\n.type\t32;\n.endef\n.p2align\t5\nvec_select_288:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\n.globl\tvec_prefetch\n\n.def\tvec_prefetch;\n.type\t32;\n.endef\n.p2align\t5\nvec_prefetch:\n\thint\t#34\n\tadd\tx1, x1, x0\n\tsub\tx1, x1, #1\n\tmov\tx2, #64\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tprfm\tpldl1keep, [x0]\n\tret\n\n.globl\tvec_is_zero_16x\n\n.def\tvec_is_zero_16x;\n.type\t32;\n.endef\n.p2align\t5\nvec_is_zero_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tlsr\tx1, x1, #4\n\tsub\tx1, x1, #1\n\tcbz\tx1, .Loop_is_zero_done\n\n.Loop_is_zero:\n\tld1\t{v1.2d}, [x0], #16\n\torr\tv0.16b, v0.16b, v1.16b\n\tsub\tx1, x1, #1\n\tcbnz\tx1, .Loop_is_zero\n\n.Loop_is_zero_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n\n.globl\tvec_is_equal_16x\n\n.def\tvec_is_equal_16x;\n.type\t32;\n.endef\n.p2align\t5\nvec_is_equal_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tld1\t{v1.2d}, [x1], #16\n\tlsr\tx2, x2, #4\n\teor\tv0.16b, v0.16b, v1.16b\n\n.Loop_is_equal:\n\tsub\tx2, x2, #1\n\tcbz\tx2, .Loop_is_equal_done\n\tld1\t{v1.2d}, [x0], #16\n\tld1\t{v2.2d}, [x1], #16\n\teor\tv1.16b, v1.16b, v2.16b\n\torr\tv0.16b, v0.16b, v1.16b\n\tb\t.Loop_is_equal\n\tnop\n\n.Loop_is_equal_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n\n"
  },
  {
    "path": "build/coff/add_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\tadd_mod_384\n\n.def\tadd_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nadd_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_add_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_add_mod_384:\n\n\n\tcall\t__add_mod_384\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_add_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_add_mod_384:\n\n.def\t__add_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__add_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__add_mod_384_a_is_loaded:\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tadd_mod_384x\n\n.def\tadd_mod_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nadd_mod_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_add_mod_384x:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$24,%rsp\n\n.LSEH_body_add_mod_384x:\n\n\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rdx),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__add_mod_384\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__add_mod_384\n\n\tmovq\t24+0(%rsp),%r15\n\n\tmovq\t24+8(%rsp),%r14\n\n\tmovq\t24+16(%rsp),%r13\n\n\tmovq\t24+24(%rsp),%r12\n\n\tmovq\t24+32(%rsp),%rbx\n\n\tmovq\t24+40(%rsp),%rbp\n\n\tleaq\t24+48(%rsp),%rsp\n\n.LSEH_epilogue_add_mod_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_add_mod_384x:\n\n\n.globl\trshift_mod_384\n\n.def\trshift_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nrshift_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_rshift_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdi\n\n.LSEH_body_rshift_mod_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n.Loop_rshift_mod_384:\n\tcall\t__rshift_mod_384\n\tdecl\t%edx\n\tjnz\t.Loop_rshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_rshift_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_rshift_mod_384:\n\n.def\t__rshift_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__rshift_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rsi\n\tmovq\t0(%rcx),%r14\n\tandq\t%r8,%rsi\n\tmovq\t8(%rcx),%r15\n\tnegq\t%rsi\n\tmovq\t16(%rcx),%rax\n\tandq\t%rsi,%r14\n\tmovq\t24(%rcx),%rbx\n\tandq\t%rsi,%r15\n\tmovq\t32(%rcx),%rbp\n\tandq\t%rsi,%rax\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t40(%rcx),%rsi\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rax\n\tadcq\t%r11,%rbx\n\tadcq\t%r12,%rbp\n\tadcq\t%r13,%rsi\n\tsbbq\t%r13,%r13\n\n\tshrq\t$1,%r14\n\tmovq\t%r15,%r8\n\tshrq\t$1,%r15\n\tmovq\t%rax,%r9\n\tshrq\t$1,%rax\n\tmovq\t%rbx,%r10\n\tshrq\t$1,%rbx\n\tmovq\t%rbp,%r11\n\tshrq\t$1,%rbp\n\tmovq\t%rsi,%r12\n\tshrq\t$1,%rsi\n\tshlq\t$63,%r8\n\tshlq\t$63,%r9\n\torq\t%r14,%r8\n\tshlq\t$63,%r10\n\torq\t%r15,%r9\n\tshlq\t$63,%r11\n\torq\t%rax,%r10\n\tshlq\t$63,%r12\n\torq\t%rbx,%r11\n\tshlq\t$63,%r13\n\torq\t%rbp,%r12\n\torq\t%rsi,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r14\n\tlfence\n\tjmpq\t*%r14\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tdiv_by_2_mod_384\n\n.def\tdiv_by_2_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\ndiv_by_2_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_div_by_2_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdi\n\n.LSEH_body_div_by_2_mod_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t%rdx,%rcx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tcall\t__rshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_div_by_2_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_div_by_2_mod_384:\n\n\n.globl\tlshift_mod_384\n\n.def\tlshift_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nlshift_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_lshift_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdi\n\n.LSEH_body_lshift_mod_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n.Loop_lshift_mod_384:\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%r14\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%r15\n\tadcq\t%r12,%r12\n\tmovq\t%r10,%rax\n\tadcq\t%r13,%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdi,%rdi\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdi\n\n\tmovq\t(%rsp),%rdi\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tcmovcq\t%rbx,%r11\n\tcmovcq\t%rbp,%r12\n\tcmovcq\t%rsi,%r13\n\n\tdecl\t%edx\n\tjnz\t.Loop_lshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_lshift_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_lshift_mod_384:\n\n.def\t__lshift_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__lshift_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%r14\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%r15\n\tadcq\t%r12,%r12\n\tmovq\t%r10,%rax\n\tadcq\t%r13,%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tcmovcq\t%rbx,%r11\n\tcmovcq\t%rbp,%r12\n\tcmovcq\t%rsi,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n\n.globl\tmul_by_3_mod_384\n\n.def\tmul_by_3_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_by_3_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_by_3_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rsi\n\n.LSEH_body_mul_by_3_mod_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_mul_by_3_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_by_3_mod_384:\n\n.globl\tmul_by_8_mod_384\n\n.def\tmul_by_8_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_by_8_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_by_8_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_mul_by_8_mod_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_mul_by_8_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_by_8_mod_384:\n\n\n.globl\tmul_by_3_mod_384x\n\n.def\tmul_by_3_mod_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_by_3_mod_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_by_3_mod_384x:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rsi\n\n.LSEH_body_mul_by_3_mod_384x:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rdi),%rdi\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rsi),%r8\n\tmovq\t56(%rsi),%r9\n\tmovq\t64(%rsi),%r10\n\tmovq\t72(%rsi),%r11\n\tmovq\t80(%rsi),%r12\n\tmovq\t88(%rsi),%r13\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t$48,%rdx\n\taddq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_mul_by_3_mod_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_by_3_mod_384x:\n\n.globl\tmul_by_8_mod_384x\n\n.def\tmul_by_8_mod_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_by_8_mod_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_by_8_mod_384x:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rsi\n\n.LSEH_body_mul_by_8_mod_384x:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rsi\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48+0(%rsi),%r8\n\tmovq\t48+8(%rsi),%r9\n\tmovq\t48+16(%rsi),%r10\n\tmovq\t48+24(%rsi),%r11\n\tmovq\t48+32(%rsi),%r12\n\tmovq\t48+40(%rsi),%r13\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t%r8,48+0(%rdi)\n\tmovq\t%r9,48+8(%rdi)\n\tmovq\t%r10,48+16(%rdi)\n\tmovq\t%r11,48+24(%rdi)\n\tmovq\t%r12,48+32(%rdi)\n\tmovq\t%r13,48+40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_mul_by_8_mod_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_by_8_mod_384x:\n\n\n.globl\tcneg_mod_384\n\n.def\tcneg_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\ncneg_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_cneg_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdx\n\n.LSEH_body_cneg_mod_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%rdx,%r8\n\tmovq\t24(%rsi),%r11\n\torq\t%r9,%rdx\n\tmovq\t32(%rsi),%r12\n\torq\t%r10,%rdx\n\tmovq\t40(%rsi),%r13\n\torq\t%r11,%rdx\n\tmovq\t$-1,%rsi\n\torq\t%r12,%rdx\n\torq\t%r13,%rdx\n\n\tmovq\t0(%rcx),%r14\n\tcmovnzq\t%rsi,%rdx\n\tmovq\t8(%rcx),%r15\n\tmovq\t16(%rcx),%rax\n\tandq\t%rdx,%r14\n\tmovq\t24(%rcx),%rbx\n\tandq\t%rdx,%r15\n\tmovq\t32(%rcx),%rbp\n\tandq\t%rdx,%rax\n\tmovq\t40(%rcx),%rsi\n\tandq\t%rdx,%rbx\n\tmovq\t0(%rsp),%rcx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\tsubq\t%r8,%r14\n\tsbbq\t%r9,%r15\n\tsbbq\t%r10,%rax\n\tsbbq\t%r11,%rbx\n\tsbbq\t%r12,%rbp\n\tsbbq\t%r13,%rsi\n\n\torq\t%rcx,%rcx\n\n\tcmovzq\t%r8,%r14\n\tcmovzq\t%r9,%r15\n\tcmovzq\t%r10,%rax\n\tmovq\t%r14,0(%rdi)\n\tcmovzq\t%r11,%rbx\n\tmovq\t%r15,8(%rdi)\n\tcmovzq\t%r12,%rbp\n\tmovq\t%rax,16(%rdi)\n\tcmovzq\t%r13,%rsi\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rsi,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_cneg_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_cneg_mod_384:\n\n\n.globl\tsub_mod_384\n\n.def\tsub_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsub_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sub_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sub_mod_384:\n\n\n\tcall\t__sub_mod_384\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sub_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sub_mod_384:\n\n.def\t__sub_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__sub_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tsub_mod_384x\n\n.def\tsub_mod_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsub_mod_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sub_mod_384x:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$24,%rsp\n\n.LSEH_body_sub_mod_384x:\n\n\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rdx),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__sub_mod_384\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__sub_mod_384\n\n\tmovq\t24+0(%rsp),%r15\n\n\tmovq\t24+8(%rsp),%r14\n\n\tmovq\t24+16(%rsp),%r13\n\n\tmovq\t24+24(%rsp),%r12\n\n\tmovq\t24+32(%rsp),%rbx\n\n\tmovq\t24+40(%rsp),%rbp\n\n\tleaq\t24+48(%rsp),%rsp\n\n.LSEH_epilogue_sub_mod_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sub_mod_384x:\n.globl\tmul_by_1_plus_i_mod_384x\n\n.def\tmul_by_1_plus_i_mod_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_by_1_plus_i_mod_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_by_1_plus_i_mod_384x:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$56,%rsp\n\n.LSEH_body_mul_by_1_plus_i_mod_384x:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rbx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rcx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\tmovq\t%rdi,48(%rsp)\n\tsbbq\t%rdi,%rdi\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rbx\n\tsbbq\t80(%rsi),%rcx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rsi,%rsi\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t0(%rdx),%r8\n\tmovq\t%r9,8(%rsp)\n\tmovq\t8(%rdx),%r9\n\tmovq\t%r10,16(%rsp)\n\tmovq\t16(%rdx),%r10\n\tmovq\t%r11,24(%rsp)\n\tmovq\t24(%rdx),%r11\n\tmovq\t%r12,32(%rsp)\n\tandq\t%rsi,%r8\n\tmovq\t32(%rdx),%r12\n\tmovq\t%r13,40(%rsp)\n\tandq\t%rsi,%r9\n\tmovq\t40(%rdx),%r13\n\tandq\t%rsi,%r10\n\tandq\t%rsi,%r11\n\tandq\t%rsi,%r12\n\tandq\t%rsi,%r13\n\tmovq\t48(%rsp),%rsi\n\n\taddq\t%r8,%r14\n\tmovq\t0(%rsp),%r8\n\tadcq\t%r9,%r15\n\tmovq\t8(%rsp),%r9\n\tadcq\t%r10,%rax\n\tmovq\t16(%rsp),%r10\n\tadcq\t%r11,%rbx\n\tmovq\t24(%rsp),%r11\n\tadcq\t%r12,%rcx\n\tmovq\t32(%rsp),%r12\n\tadcq\t%r13,%rbp\n\tmovq\t40(%rsp),%r13\n\n\tmovq\t%r14,0(%rsi)\n\tmovq\t%r8,%r14\n\tmovq\t%r15,8(%rsi)\n\tmovq\t%rax,16(%rsi)\n\tmovq\t%r9,%r15\n\tmovq\t%rbx,24(%rsi)\n\tmovq\t%rcx,32(%rsi)\n\tmovq\t%r10,%rax\n\tmovq\t%rbp,40(%rsi)\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t%r11,%rbx\n\tsbbq\t8(%rdx),%r9\n\tsbbq\t16(%rdx),%r10\n\tmovq\t%r12,%rcx\n\tsbbq\t24(%rdx),%r11\n\tsbbq\t32(%rdx),%r12\n\tmovq\t%r13,%rbp\n\tsbbq\t40(%rdx),%r13\n\tsbbq\t$0,%rdi\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,48(%rsi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,56(%rsi)\n\tcmovcq\t%rcx,%r12\n\tmovq\t%r10,64(%rsi)\n\tcmovcq\t%rbp,%r13\n\tmovq\t%r11,72(%rsi)\n\tmovq\t%r12,80(%rsi)\n\tmovq\t%r13,88(%rsi)\n\n\tmovq\t56+0(%rsp),%r15\n\n\tmovq\t56+8(%rsp),%r14\n\n\tmovq\t56+16(%rsp),%r13\n\n\tmovq\t56+24(%rsp),%r12\n\n\tmovq\t56+32(%rsp),%rbx\n\n\tmovq\t56+40(%rsp),%rbp\n\n\tleaq\t56+48(%rsp),%rsp\n\n.LSEH_epilogue_mul_by_1_plus_i_mod_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_by_1_plus_i_mod_384x:\n.globl\tsgn0_pty_mod_384\n\n.def\tsgn0_pty_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsgn0_pty_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sgn0_pty_mod_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n.LSEH_body_sgn0_pty_mod_384:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%rcx\n\tmovq\t40(%rdi),%rdx\n\n\txorq\t%rax,%rax\n\tmovq\t%r8,%rdi\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rsi),%r8\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%rdi\n\tandq\t$2,%rax\n\torq\t%rdi,%rax\n\n.LSEH_epilogue_sgn0_pty_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sgn0_pty_mod_384:\n\n.globl\tsgn0_pty_mod_384x\n\n.def\tsgn0_pty_mod_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsgn0_pty_mod_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sgn0_pty_mod_384x:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tpushq\t%rbx\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sgn0_pty_mod_384x:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rdi),%r8\n\tmovq\t56(%rdi),%r9\n\tmovq\t64(%rdi),%r10\n\tmovq\t72(%rdi),%r11\n\tmovq\t80(%rdi),%rcx\n\tmovq\t88(%rdi),%rdx\n\n\tmovq\t%r8,%rbx\n\torq\t%r9,%r8\n\torq\t%r10,%r8\n\torq\t%r11,%r8\n\torq\t%rcx,%r8\n\torq\t%rdx,%r8\n\n\tleaq\t0(%rdi),%rax\n\txorq\t%rdi,%rdi\n\tmovq\t%rbx,%rbp\n\taddq\t%rbx,%rbx\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rsi),%rbx\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r8,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%rbp\n\tandq\t$2,%rdi\n\torq\t%rbp,%rdi\n\n\tmovq\t0(%rax),%r8\n\tmovq\t8(%rax),%r9\n\tmovq\t16(%rax),%r10\n\tmovq\t24(%rax),%r11\n\tmovq\t32(%rax),%rcx\n\tmovq\t40(%rax),%rdx\n\n\tmovq\t%r8,%rbx\n\torq\t%r9,%r8\n\torq\t%r10,%r8\n\torq\t%r11,%r8\n\torq\t%rcx,%r8\n\torq\t%rdx,%r8\n\n\txorq\t%rax,%rax\n\tmovq\t%rbx,%rbp\n\taddq\t%rbx,%rbx\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rsi),%rbx\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%rbx\n\n\tnotq\t%rax\n\n\ttestq\t%r8,%r8\n\tcmovzq\t%rdi,%rbp\n\n\ttestq\t%rbx,%rbx\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%rbp\n\tandq\t$2,%rax\n\torq\t%rbp,%rax\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_sgn0_pty_mod_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sgn0_pty_mod_384x:\n.globl\tvec_select_32\n\n.def\tvec_select_32;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_select_32:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%r9d,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdx),%xmm0\n\tleaq\t16(%rdx),%rdx\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%r8),%xmm1\n\tleaq\t16(%r8),%r8\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t16(%rcx),%rcx\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-16(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-16(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-16(%rcx)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-16(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_select_48\n\n.def\tvec_select_48;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_select_48:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%r9d,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdx),%xmm0\n\tleaq\t24(%rdx),%rdx\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%r8),%xmm1\n\tleaq\t24(%r8),%r8\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t24(%rcx),%rcx\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-24(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-24(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-24(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-24(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-24(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-24(%rcx)\n\tpand\t%xmm4,%xmm0\n\tpand\t%xmm5,%xmm1\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-24(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_select_96\n\n.def\tvec_select_96;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_select_96:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%r9d,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdx),%xmm0\n\tleaq\t48(%rdx),%rdx\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%r8),%xmm1\n\tleaq\t48(%r8),%r8\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t48(%rcx),%rcx\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-48(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-48(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-48(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-48(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-48(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-48(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-48(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-48(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-48(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-48(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-48(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-48(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-48(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-48(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-48(%rcx)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-48(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_select_192\n\n.def\tvec_select_192;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_select_192:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%r9d,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdx),%xmm0\n\tleaq\t96(%rdx),%rdx\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%r8),%xmm1\n\tleaq\t96(%r8),%r8\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t96(%rcx),%rcx\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-96(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-96(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-96(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-96(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-96(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-96(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-96(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-96(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-96(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-96(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-96(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-96(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-96(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-96(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-96(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-96(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-96(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-96(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-96(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-96(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-96(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-96(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-96(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-96(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t128+16-96(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t128+16-96(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-96(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t144+16-96(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t144+16-96(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,144-96(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t160+16-96(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t160+16-96(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,160-96(%rcx)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,176-96(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_select_144\n\n.def\tvec_select_144;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_select_144:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%r9d,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdx),%xmm0\n\tleaq\t72(%rdx),%rdx\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%r8),%xmm1\n\tleaq\t72(%r8),%r8\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t72(%rcx),%rcx\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-72(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-72(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-72(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-72(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-72(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-72(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-72(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-72(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-72(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-72(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-72(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-72(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-72(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-72(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-72(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-72(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-72(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-72(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-72(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-72(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-72(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-72(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-72(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-72(%rcx)\n\tpand\t%xmm4,%xmm0\n\tpand\t%xmm5,%xmm1\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-72(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_select_288\n\n.def\tvec_select_288;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_select_288:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%r9d,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdx),%xmm0\n\tleaq\t144(%rdx),%rdx\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%r8),%xmm1\n\tleaq\t144(%r8),%r8\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t144(%rcx),%rcx\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t128+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t128+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t144+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t144+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,144-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t160+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t160+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,160-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t176+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t176+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,176-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t192+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t192+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,192-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t208+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t208+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,208-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t224+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t224+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,224-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t240+16-144(%rdx),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t240+16-144(%r8),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,240-144(%rcx)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t256+16-144(%rdx),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t256+16-144(%r8),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,256-144(%rcx)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,272-144(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_prefetch\n\n.def\tvec_prefetch;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_prefetch:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tleaq\t-1(%rcx,%rdx,1),%rdx\n\tmovq\t$64,%rax\n\txorq\t%r8,%r8\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tprefetchnta\t(%rcx)\n\tleaq\t(%rcx,%rax,1),%rcx\n\tcmpq\t%rdx,%rcx\n\tcmovaq\t%rdx,%rcx\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rcx)\n\tleaq\t(%rcx,%rax,1),%rcx\n\tcmpq\t%rdx,%rcx\n\tcmovaq\t%rdx,%rcx\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rcx)\n\tleaq\t(%rcx,%rax,1),%rcx\n\tcmpq\t%rdx,%rcx\n\tcmovaq\t%rdx,%rcx\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rcx)\n\tleaq\t(%rcx,%rax,1),%rcx\n\tcmpq\t%rdx,%rcx\n\tcmovaq\t%rdx,%rcx\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rcx)\n\tleaq\t(%rcx,%rax,1),%rcx\n\tcmpq\t%rdx,%rcx\n\tcmovaq\t%rdx,%rcx\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rcx)\n\tleaq\t(%rcx,%rax,1),%rcx\n\tcmpq\t%rdx,%rcx\n\tcmovaq\t%rdx,%rcx\n\tprefetchnta\t(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_is_zero_16x\n\n.def\tvec_is_zero_16x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_is_zero_16x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tshrl\t$4,%edx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rcx),%xmm0\n\tleaq\t16(%rcx),%rcx\n\n.Loop_is_zero:\n\tdecl\t%edx\n\tjz\t.Loop_is_zero_done\n\tmovdqu\t(%rcx),%xmm1\n\tleaq\t16(%rcx),%rcx\n\tpor\t%xmm1,%xmm0\n\tjmp\t.Loop_is_zero\n\n.Loop_is_zero_done:\n\tpshufd\t$0x4e,%xmm0,%xmm1\n\tpor\t%xmm1,%xmm0\n.byte\t102,72,15,126,192\n\tincl\t%edx\n\ttestq\t%rax,%rax\n\tcmovnzl\t%edx,%eax\n\txorl\t$1,%eax\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tvec_is_equal_16x\n\n.def\tvec_is_equal_16x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nvec_is_equal_16x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tshrl\t$4,%r8d\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rcx),%xmm0\n\tmovdqu\t(%rdx),%xmm1\n\tsubq\t%rcx,%rdx\n\tleaq\t16(%rcx),%rcx\n\tpxor\t%xmm1,%xmm0\n\n.Loop_is_equal:\n\tdecl\t%r8d\n\tjz\t.Loop_is_equal_done\n\tmovdqu\t(%rcx),%xmm1\n\tmovdqu\t(%rcx,%rdx,1),%xmm2\n\tleaq\t16(%rcx),%rcx\n\tpxor\t%xmm2,%xmm1\n\tpor\t%xmm1,%xmm0\n\tjmp\t.Loop_is_equal\n\n.Loop_is_equal_done:\n\tpshufd\t$0x4e,%xmm0,%xmm1\n\tpor\t%xmm1,%xmm0\n.byte\t102,72,15,126,192\n\tincl\t%r8d\n\ttestq\t%rax,%rax\n\tcmovnzl\t%r8d,%eax\n\txorl\t$1,%eax\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_add_mod_384\n.rva\t.LSEH_body_add_mod_384\n.rva\t.LSEH_info_add_mod_384_prologue\n\n.rva\t.LSEH_body_add_mod_384\n.rva\t.LSEH_epilogue_add_mod_384\n.rva\t.LSEH_info_add_mod_384_body\n\n.rva\t.LSEH_epilogue_add_mod_384\n.rva\t.LSEH_end_add_mod_384\n.rva\t.LSEH_info_add_mod_384_epilogue\n\n.rva\t.LSEH_begin_add_mod_384x\n.rva\t.LSEH_body_add_mod_384x\n.rva\t.LSEH_info_add_mod_384x_prologue\n\n.rva\t.LSEH_body_add_mod_384x\n.rva\t.LSEH_epilogue_add_mod_384x\n.rva\t.LSEH_info_add_mod_384x_body\n\n.rva\t.LSEH_epilogue_add_mod_384x\n.rva\t.LSEH_end_add_mod_384x\n.rva\t.LSEH_info_add_mod_384x_epilogue\n\n.rva\t.LSEH_begin_rshift_mod_384\n.rva\t.LSEH_body_rshift_mod_384\n.rva\t.LSEH_info_rshift_mod_384_prologue\n\n.rva\t.LSEH_body_rshift_mod_384\n.rva\t.LSEH_epilogue_rshift_mod_384\n.rva\t.LSEH_info_rshift_mod_384_body\n\n.rva\t.LSEH_epilogue_rshift_mod_384\n.rva\t.LSEH_end_rshift_mod_384\n.rva\t.LSEH_info_rshift_mod_384_epilogue\n\n.rva\t.LSEH_begin_div_by_2_mod_384\n.rva\t.LSEH_body_div_by_2_mod_384\n.rva\t.LSEH_info_div_by_2_mod_384_prologue\n\n.rva\t.LSEH_body_div_by_2_mod_384\n.rva\t.LSEH_epilogue_div_by_2_mod_384\n.rva\t.LSEH_info_div_by_2_mod_384_body\n\n.rva\t.LSEH_epilogue_div_by_2_mod_384\n.rva\t.LSEH_end_div_by_2_mod_384\n.rva\t.LSEH_info_div_by_2_mod_384_epilogue\n\n.rva\t.LSEH_begin_lshift_mod_384\n.rva\t.LSEH_body_lshift_mod_384\n.rva\t.LSEH_info_lshift_mod_384_prologue\n\n.rva\t.LSEH_body_lshift_mod_384\n.rva\t.LSEH_epilogue_lshift_mod_384\n.rva\t.LSEH_info_lshift_mod_384_body\n\n.rva\t.LSEH_epilogue_lshift_mod_384\n.rva\t.LSEH_end_lshift_mod_384\n.rva\t.LSEH_info_lshift_mod_384_epilogue\n\n.rva\t.LSEH_begin_mul_by_3_mod_384\n.rva\t.LSEH_body_mul_by_3_mod_384\n.rva\t.LSEH_info_mul_by_3_mod_384_prologue\n\n.rva\t.LSEH_body_mul_by_3_mod_384\n.rva\t.LSEH_epilogue_mul_by_3_mod_384\n.rva\t.LSEH_info_mul_by_3_mod_384_body\n\n.rva\t.LSEH_epilogue_mul_by_3_mod_384\n.rva\t.LSEH_end_mul_by_3_mod_384\n.rva\t.LSEH_info_mul_by_3_mod_384_epilogue\n\n.rva\t.LSEH_begin_mul_by_8_mod_384\n.rva\t.LSEH_body_mul_by_8_mod_384\n.rva\t.LSEH_info_mul_by_8_mod_384_prologue\n\n.rva\t.LSEH_body_mul_by_8_mod_384\n.rva\t.LSEH_epilogue_mul_by_8_mod_384\n.rva\t.LSEH_info_mul_by_8_mod_384_body\n\n.rva\t.LSEH_epilogue_mul_by_8_mod_384\n.rva\t.LSEH_end_mul_by_8_mod_384\n.rva\t.LSEH_info_mul_by_8_mod_384_epilogue\n\n.rva\t.LSEH_begin_mul_by_3_mod_384x\n.rva\t.LSEH_body_mul_by_3_mod_384x\n.rva\t.LSEH_info_mul_by_3_mod_384x_prologue\n\n.rva\t.LSEH_body_mul_by_3_mod_384x\n.rva\t.LSEH_epilogue_mul_by_3_mod_384x\n.rva\t.LSEH_info_mul_by_3_mod_384x_body\n\n.rva\t.LSEH_epilogue_mul_by_3_mod_384x\n.rva\t.LSEH_end_mul_by_3_mod_384x\n.rva\t.LSEH_info_mul_by_3_mod_384x_epilogue\n\n.rva\t.LSEH_begin_mul_by_8_mod_384x\n.rva\t.LSEH_body_mul_by_8_mod_384x\n.rva\t.LSEH_info_mul_by_8_mod_384x_prologue\n\n.rva\t.LSEH_body_mul_by_8_mod_384x\n.rva\t.LSEH_epilogue_mul_by_8_mod_384x\n.rva\t.LSEH_info_mul_by_8_mod_384x_body\n\n.rva\t.LSEH_epilogue_mul_by_8_mod_384x\n.rva\t.LSEH_end_mul_by_8_mod_384x\n.rva\t.LSEH_info_mul_by_8_mod_384x_epilogue\n\n.rva\t.LSEH_begin_cneg_mod_384\n.rva\t.LSEH_body_cneg_mod_384\n.rva\t.LSEH_info_cneg_mod_384_prologue\n\n.rva\t.LSEH_body_cneg_mod_384\n.rva\t.LSEH_epilogue_cneg_mod_384\n.rva\t.LSEH_info_cneg_mod_384_body\n\n.rva\t.LSEH_epilogue_cneg_mod_384\n.rva\t.LSEH_end_cneg_mod_384\n.rva\t.LSEH_info_cneg_mod_384_epilogue\n\n.rva\t.LSEH_begin_sub_mod_384\n.rva\t.LSEH_body_sub_mod_384\n.rva\t.LSEH_info_sub_mod_384_prologue\n\n.rva\t.LSEH_body_sub_mod_384\n.rva\t.LSEH_epilogue_sub_mod_384\n.rva\t.LSEH_info_sub_mod_384_body\n\n.rva\t.LSEH_epilogue_sub_mod_384\n.rva\t.LSEH_end_sub_mod_384\n.rva\t.LSEH_info_sub_mod_384_epilogue\n\n.rva\t.LSEH_begin_sub_mod_384x\n.rva\t.LSEH_body_sub_mod_384x\n.rva\t.LSEH_info_sub_mod_384x_prologue\n\n.rva\t.LSEH_body_sub_mod_384x\n.rva\t.LSEH_epilogue_sub_mod_384x\n.rva\t.LSEH_info_sub_mod_384x_body\n\n.rva\t.LSEH_epilogue_sub_mod_384x\n.rva\t.LSEH_end_sub_mod_384x\n.rva\t.LSEH_info_sub_mod_384x_epilogue\n\n.rva\t.LSEH_begin_mul_by_1_plus_i_mod_384x\n.rva\t.LSEH_body_mul_by_1_plus_i_mod_384x\n.rva\t.LSEH_info_mul_by_1_plus_i_mod_384x_prologue\n\n.rva\t.LSEH_body_mul_by_1_plus_i_mod_384x\n.rva\t.LSEH_epilogue_mul_by_1_plus_i_mod_384x\n.rva\t.LSEH_info_mul_by_1_plus_i_mod_384x_body\n\n.rva\t.LSEH_epilogue_mul_by_1_plus_i_mod_384x\n.rva\t.LSEH_end_mul_by_1_plus_i_mod_384x\n.rva\t.LSEH_info_mul_by_1_plus_i_mod_384x_epilogue\n\n.rva\t.LSEH_begin_sgn0_pty_mod_384\n.rva\t.LSEH_body_sgn0_pty_mod_384\n.rva\t.LSEH_info_sgn0_pty_mod_384_prologue\n\n.rva\t.LSEH_body_sgn0_pty_mod_384\n.rva\t.LSEH_epilogue_sgn0_pty_mod_384\n.rva\t.LSEH_info_sgn0_pty_mod_384_body\n\n.rva\t.LSEH_epilogue_sgn0_pty_mod_384\n.rva\t.LSEH_end_sgn0_pty_mod_384\n.rva\t.LSEH_info_sgn0_pty_mod_384_epilogue\n\n.rva\t.LSEH_begin_sgn0_pty_mod_384x\n.rva\t.LSEH_body_sgn0_pty_mod_384x\n.rva\t.LSEH_info_sgn0_pty_mod_384x_prologue\n\n.rva\t.LSEH_body_sgn0_pty_mod_384x\n.rva\t.LSEH_epilogue_sgn0_pty_mod_384x\n.rva\t.LSEH_info_sgn0_pty_mod_384x_body\n\n.rva\t.LSEH_epilogue_sgn0_pty_mod_384x\n.rva\t.LSEH_end_sgn0_pty_mod_384x\n.rva\t.LSEH_info_sgn0_pty_mod_384x_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_add_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_add_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_add_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_add_mod_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_add_mod_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x03,0x00\n.byte\t0x00,0xe4,0x04,0x00\n.byte\t0x00,0xd4,0x05,0x00\n.byte\t0x00,0xc4,0x06,0x00\n.byte\t0x00,0x34,0x07,0x00\n.byte\t0x00,0x54,0x08,0x00\n.byte\t0x00,0x74,0x0a,0x00\n.byte\t0x00,0x64,0x0b,0x00\n.byte\t0x00,0x82\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_add_mod_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_rshift_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_rshift_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_rshift_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_div_by_2_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_div_by_2_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_div_by_2_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_lshift_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_lshift_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_lshift_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_by_3_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_by_3_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_by_3_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_by_8_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_by_8_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_by_8_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_by_3_mod_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_by_3_mod_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_by_3_mod_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_by_8_mod_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_by_8_mod_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_by_8_mod_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_cneg_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_cneg_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_cneg_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sub_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sub_mod_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sub_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sub_mod_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sub_mod_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x03,0x00\n.byte\t0x00,0xe4,0x04,0x00\n.byte\t0x00,0xd4,0x05,0x00\n.byte\t0x00,0xc4,0x06,0x00\n.byte\t0x00,0x34,0x07,0x00\n.byte\t0x00,0x54,0x08,0x00\n.byte\t0x00,0x74,0x0a,0x00\n.byte\t0x00,0x64,0x0b,0x00\n.byte\t0x00,0x82\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sub_mod_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_by_1_plus_i_mod_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_by_1_plus_i_mod_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x07,0x00\n.byte\t0x00,0xe4,0x08,0x00\n.byte\t0x00,0xd4,0x09,0x00\n.byte\t0x00,0xc4,0x0a,0x00\n.byte\t0x00,0x34,0x0b,0x00\n.byte\t0x00,0x54,0x0c,0x00\n.byte\t0x00,0x74,0x0e,0x00\n.byte\t0x00,0x64,0x0f,0x00\n.byte\t0x00,0xc2\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_by_1_plus_i_mod_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sgn0_pty_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sgn0_pty_mod_384_body:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sgn0_pty_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sgn0_pty_mod_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sgn0_pty_mod_384x_body:\n.byte\t1,0,9,0\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sgn0_pty_mod_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/add_mod_384x384-x86_64.s",
    "content": ".text\t\n\n.globl\tadd_mod_384x384\n\n.def\tadd_mod_384x384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nadd_mod_384x384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_add_mod_384x384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_add_mod_384x384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\taddq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tadcq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tadcq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tadcq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tadcq\t48(%rdx),%r14\n\tmovq\t%r9,8(%rdi)\n\tadcq\t56(%rdx),%r15\n\tmovq\t%r10,16(%rdi)\n\tadcq\t64(%rdx),%rax\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r14,%r8\n\tadcq\t72(%rdx),%rbx\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r15,%r9\n\tadcq\t80(%rdx),%rbp\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%rax,%r10\n\tadcq\t88(%rdx),%rsi\n\tmovq\t%rbx,%r11\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%rbp,%r12\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%rbx\n\tsbbq\t32(%rcx),%rbp\n\tmovq\t%rsi,%r13\n\tsbbq\t40(%rcx),%rsi\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r8,%r14\n\tcmovcq\t%r9,%r15\n\tcmovcq\t%r10,%rax\n\tmovq\t%r14,48(%rdi)\n\tcmovcq\t%r11,%rbx\n\tmovq\t%r15,56(%rdi)\n\tcmovcq\t%r12,%rbp\n\tmovq\t%rax,64(%rdi)\n\tcmovcq\t%r13,%rsi\n\tmovq\t%rbx,72(%rdi)\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_add_mod_384x384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_add_mod_384x384:\n\n.globl\tsub_mod_384x384\n\n.def\tsub_mod_384x384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsub_mod_384x384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sub_mod_384x384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sub_mod_384x384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sub_mod_384x384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sub_mod_384x384:\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_add_mod_384x384\n.rva\t.LSEH_body_add_mod_384x384\n.rva\t.LSEH_info_add_mod_384x384_prologue\n\n.rva\t.LSEH_body_add_mod_384x384\n.rva\t.LSEH_epilogue_add_mod_384x384\n.rva\t.LSEH_info_add_mod_384x384_body\n\n.rva\t.LSEH_epilogue_add_mod_384x384\n.rva\t.LSEH_end_add_mod_384x384\n.rva\t.LSEH_info_add_mod_384x384_epilogue\n\n.rva\t.LSEH_begin_sub_mod_384x384\n.rva\t.LSEH_body_sub_mod_384x384\n.rva\t.LSEH_info_sub_mod_384x384_prologue\n\n.rva\t.LSEH_body_sub_mod_384x384\n.rva\t.LSEH_epilogue_sub_mod_384x384\n.rva\t.LSEH_info_sub_mod_384x384_body\n\n.rva\t.LSEH_epilogue_sub_mod_384x384\n.rva\t.LSEH_end_sub_mod_384x384\n.rva\t.LSEH_info_sub_mod_384x384_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_add_mod_384x384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_add_mod_384x384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_add_mod_384x384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sub_mod_384x384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sub_mod_384x384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sub_mod_384x384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/ct_inverse_mod_256-armv8.S",
    "content": ".text\n\n.globl\tct_inverse_mod_256\n\n.def\tct_inverse_mod_256;\n.type\t32;\n.endef\n.p2align\t5\nct_inverse_mod_256:\n\thint\t#25\n\tstp\tx29, x30, [sp,#-10*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1040\n\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx6, x7, [x1,#8*2]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#16+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #16+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstr\tx0, [sp]\t\t// offload out_ptr\n\n\tldp\tx8, x9, [x2,#8*0]\n\tldp\tx10, x11, [x2,#8*2]\n\n\tstp\tx4, x5, [x1,#8*0]\t// copy input to |a|\n\tstp\tx6, x7, [x1,#8*2]\n\tstp\tx8, x9, [x1,#8*4]\t// copy modulus to |b|\n\tstp\tx10, x11, [x1,#8*6]\n\n\t////////////////////////////////////////// first iteration\n\tbl\t.Lab_approximation_31_256_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12,[x0,#8*8]\t\t// initialize |u| with |f0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12, [x0,#8*10]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tldr\tx8, [x1,#8*8]\t\t// |u|\n\tldr\tx9, [x1,#8*14]\t// |v|\n\tmadd\tx4, x16, x8, xzr\t// |u|*|f0|\n\tmadd\tx4, x17, x9, x4\t// |v|*|g0|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [x0,#8*4]\n\tstp\tx5, x5, [x0,#8*6]\n\n\tmadd\tx4, x12, x8, xzr\t// |u|*|f1|\n\tmadd\tx4, x13, x9, x4\t// |v|*|g1|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [x0,#8*10]\n\tstp\tx5, x5, [x0,#8*12]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\n\tstr\tx24, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\t\t// sign extension\n\tstp\tx24, x24, [x0,#8*4]\n\tstp\tx24, x24, [x0,#8*6]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\t////////////////////////////////////////// two[!] last iterations\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #47\t\t\t// 31 + 512 % 31\n\t//bl\t__ab_approximation_62_256\t// |a| and |b| are exact,\n\tldr\tx7, [x1,#8*0]\t\t// just load\n\tldr\tx11, [x1,#8*4]\n\tbl\t__inner_loop_62_256\n\n\tmov\tx16, x14\n\tmov\tx17, x15\n\tldr\tx0, [sp]\t\t\t// original out_ptr\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx20, x7, x17\t\t// figure out top-most limb\n\tldp\tx8, x9, [x3,#8*0]\n\tadc\tx23, x23, x25\n\tldp\tx10, x11, [x3,#8*2]\n\n\tadd\tx20, x20, x23\t\t// x20 is 1, 0 or -1\n\tasr\tx19, x20, #63\t\t// sign as mask\n\n\tand\tx23,   x8, x19\t\t// add mod<<256 conditionally\n\tand\tx24,   x9, x19\n\tadds\tx4, x4, x23\n\tand\tx25,   x10, x19\n\tadcs\tx5, x5, x24\n\tand\tx26,   x11, x19\n\tadcs\tx6, x6, x25\n\tadcs\tx7, x22,   x26\n\tadc\tx20, x20, xzr\t\t// x20 is 1, 0 or -1\n\n\tneg\tx19, x20\n\torr\tx20, x20, x19\t\t// excess bit or sign as mask\n\tasr\tx19, x19, #63\t\t// excess bit as mask\n\n\tand\tx8, x8, x20\t\t// mask |mod|\n\tand\tx9, x9, x20\n\tand\tx10, x10, x20\n\tand\tx11, x11, x20\n\n\teor\tx8, x8, x19\t\t// conditionally negate |mod|\n\teor\tx9, x9, x19\n\tadds\tx8, x8, x19, lsr#63\n\teor\tx10, x10, x19\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x19\n\tadcs\tx10, x10, xzr\n\tadc\tx11, x11, xzr\n\n\tadds\tx4, x4, x8\t// final adjustment for |mod|<<256\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*4]\n\tadc\tx7, x7, x11\n\tstp\tx6, x7, [x0,#8*6]\n\n\tadd\tsp, sp, #1040\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#10*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n////////////////////////////////////////////////////////////////////////\n.def\t__smul_256x63;\n.type\t32;\n.endef\n.p2align\t5\n__smul_256x63:\n\tldp\tx4, x5, [x1,#8*0+64]\t// load |u| (or |v|)\n\tasr\tx14, x16, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx6, x7, [x1,#8*2+64]\n\teor\tx16, x16, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx22, [x1,#8*4+64]\n\n\teor\tx4, x4, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx16, x16, x14\n\teor\tx5, x5, x14\n\tadds\tx4, x4, x14, lsr#63\n\teor\tx6, x6, x14\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\teor\tx22, x22, x14\n\tumulh\tx19, x4, x16\n\tadcs\tx7, x7, xzr\n\tumulh\tx20, x5, x16\n\tadcs\tx22, x22, xzr\n\tumulh\tx21, x6, x16\n\tmul\tx4, x4, x16\n\tcmp\tx16, #0\n\tmul\tx5, x5, x16\n\tcsel\tx22, x22, xzr, ne\n\tmul\tx6, x6, x16\n\tadds\tx5, x5, x19\n\tmul\tx24, x7, x16\n\tadcs\tx6, x6, x20\n\tadcs\tx24, x24, x21\n\tadc\tx26, xzr, xzr\n\tldp\tx8, x9, [x1,#8*0+112]\t// load |u| (or |v|)\n\tasr\tx14, x17, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx10, x11, [x1,#8*2+112]\n\teor\tx17, x17, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx23, [x1,#8*4+112]\n\n\teor\tx8, x8, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx17, x17, x14\n\teor\tx9, x9, x14\n\tadds\tx8, x8, x14, lsr#63\n\teor\tx10, x10, x14\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x14\n\tadcs\tx10, x10, xzr\n\teor\tx23, x23, x14\n\tumulh\tx19, x8, x17\n\tadcs\tx11, x11, xzr\n\tumulh\tx20, x9, x17\n\tadcs\tx23, x23, xzr\n\tumulh\tx21, x10, x17\n\tadc\tx15, xzr, xzr\t\t// used in __smul_512x63_tail\n\tmul\tx8, x8, x17\n\tcmp\tx17, #0\n\tmul\tx9, x9, x17\n\tcsel\tx23, x23, xzr, ne\n\tmul\tx10, x10, x17\n\tadds\tx9, x9, x19\n\tmul\tx25, x11, x17\n\tadcs\tx10, x10, x20\n\tadcs\tx25, x25, x21\n\tadc\tx26, x26, xzr\n\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*0]\n\tadcs\tx24,   x24,   x25\n\tstp\tx6, x24, [x0,#8*2]\n\n\tret\n\n\n.def\t__smul_512x63_tail;\n.type\t32;\n.endef\n.p2align\t5\n__smul_512x63_tail:\n\tumulh\tx24, x7, x16\n\tldr\tx5, [x1,#8*19]\t// load rest of |v|\n\tadc\tx26, x26, xzr\n\tldp\tx6, x7, [x1,#8*20]\n\tand\tx22, x22, x16\n\n\tumulh\tx11, x11, x17\t// resume |v|*|g1| chain\n\n\tsub\tx24, x24, x22\t// tie up |u|*|f1| chain\n\tasr\tx25, x24, #63\n\n\teor\tx5, x5, x14\t// conditionally negate rest of |v|\n\teor\tx6, x6, x14\n\tadds\tx5, x5, x15\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\tumulh\tx19, x23,   x17\n\tadc\tx7, x7, xzr\n\tumulh\tx20, x5, x17\n\tadd\tx11, x11, x26\n\tumulh\tx21, x6, x17\n\n\tmul\tx4, x23,   x17\n\tmul\tx5, x5, x17\n\tadds\tx4, x4, x11\n\tmul\tx6, x6, x17\n\tadcs\tx5, x5, x19\n\tmul\tx22,   x7, x17\n\tadcs\tx6, x6, x20\n\tadcs\tx22,   x22,   x21\n\tadc\tx23, xzr, xzr\t\t// used in the final step\n\n\tadds\tx4, x4, x24\n\tadcs\tx5, x5, x25\n\tadcs\tx6, x6, x25\n\tstp\tx4, x5, [x0,#8*4]\n\tadcs\tx22,   x22,   x25\t// carry is used in the final step\n\tstp\tx6, x22,   [x0,#8*6]\n\n\tret\n\n\n.def\t__smul_256_n_shift_by_31;\n.type\t32;\n.endef\n.p2align\t5\n__smul_256_n_shift_by_31:\n\tldp\tx4, x5, [x1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx24, x12, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx6, x7, [x1,#8*2+0]\n\teor\tx25, x12, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx4, x4, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx5, x5, x24\n\tadds\tx4, x4, x24, lsr#63\n\teor\tx6, x6, x24\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x24\n\tumulh\tx19, x4, x25\n\tadcs\tx6, x6, xzr\n\tumulh\tx20, x5, x25\n\tadc\tx7, x7, xzr\n\tumulh\tx21, x6, x25\n\tand\tx24, x24, x25\n\tumulh\tx22, x7, x25\n\tneg\tx24, x24\n\n\tmul\tx4, x4, x25\n\tmul\tx5, x5, x25\n\tmul\tx6, x6, x25\n\tadds\tx5, x5, x19\n\tmul\tx7, x7, x25\n\tadcs\tx6, x6, x20\n\tadcs\tx7, x7, x21\n\tadc\tx22, x22, x24\n\tldp\tx8, x9, [x1,#8*0+32]\t// load |a| (or |b|)\n\tasr\tx24, x13, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx10, x11, [x1,#8*2+32]\n\teor\tx25, x13, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx8, x8, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx9, x9, x24\n\tadds\tx8, x8, x24, lsr#63\n\teor\tx10, x10, x24\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x24\n\tumulh\tx19, x8, x25\n\tadcs\tx10, x10, xzr\n\tumulh\tx20, x9, x25\n\tadc\tx11, x11, xzr\n\tumulh\tx21, x10, x25\n\tand\tx24, x24, x25\n\tumulh\tx23, x11, x25\n\tneg\tx24, x24\n\n\tmul\tx8, x8, x25\n\tmul\tx9, x9, x25\n\tmul\tx10, x10, x25\n\tadds\tx9, x9, x19\n\tmul\tx11, x11, x25\n\tadcs\tx10, x10, x20\n\tadcs\tx11, x11, x21\n\tadc\tx23, x23, x24\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tadcs\tx7, x7, x11\n\tadc\tx8, x22,   x23\n\n\textr\tx4, x5, x4, #31\n\textr\tx5, x6, x5, #31\n\textr\tx6, x7, x6, #31\n\tasr\tx23, x8, #63\t// result's sign as mask\n\textr\tx7, x8, x7, #31\n\n\teor\tx4, x4, x23\t// ensure the result is positive\n\teor\tx5, x5, x23\n\tadds\tx4, x4, x23, lsr#63\n\teor\tx6, x6, x23\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x23\n\tadcs\tx6, x6, xzr\n\tstp\tx4, x5, [x0,#8*0]\n\tadc\tx7, x7, xzr\n\tstp\tx6, x7, [x0,#8*2]\n\n\teor\tx12, x12, x23\t\t// adjust |f/g| accordingly\n\teor\tx13, x13, x23\n\tsub\tx12, x12, x23\n\tsub\tx13, x13, x23\n\n\tret\n\n.def\t__ab_approximation_31_256;\n.type\t32;\n.endef\n.p2align\t4\n__ab_approximation_31_256:\n\tldp\tx6, x7, [x1,#8*2]\n\tldp\tx10, x11, [x1,#8*6]\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx8, x9, [x1,#8*4]\n\n.Lab_approximation_31_256_loaded:\n\torr\tx19, x7, x11\t// check top-most limbs, ...\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x5, ne\n\torr\tx19, x7, x11\t// and ones before top-most, ...\n\tcsel\tx10, x10, x9, ne\n\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x4, ne\n\torr\tx19, x7, x11\t// and one more, ...\n\tcsel\tx10, x10, x8, ne\n\n\tclz\tx19, x19\n\tcmp\tx19, #64\n\tcsel\tx19, x19, xzr, ne\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tneg\tx20, x19\n\n\tlslv\tx7, x7, x19\t// align high limbs to the left\n\tlslv\tx11, x11, x19\n\tlsrv\tx6, x6, x20\n\tlsrv\tx10, x10, x20\n\tand\tx6, x6, x20, asr#6\n\tand\tx10, x10, x20, asr#6\n\torr\tx7, x7, x6\n\torr\tx11, x11, x10\n\n\tbfxil\tx7, x4, #0, #31\n\tbfxil\tx11, x8, #0, #31\n\n\tb\t__inner_loop_31_256\n\tret\n\n\n.def\t__inner_loop_31_256;\n.type\t32;\n.endef\n.p2align\t4\n__inner_loop_31_256:\n\tmov\tx2, #31\n\tmov\tx13, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx15, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx23,#0x7FFFFFFF7FFFFFFF\n\n.Loop_31_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x15\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx15, x15, x13,    hs\t// exchange |fg0| and |fg1|\n\tcsel\tx13, x13, x19,   hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x15, x22\n\tand\tx20, x23, x22\n\tsub\tx13, x13, x19\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx15, x15, x15\t// |f1|<<=1\n\tadd\tx13, x13, x20\n\tsub\tx15, x15, x23\n\tcbnz\tx2, .Loop_31_256\n\n\tmov\tx23, #0x7FFFFFFF\n\tubfx\tx12, x13, #0, #32\n\tubfx\tx13, x13, #32, #32\n\tubfx\tx14, x15, #0, #32\n\tubfx\tx15, x15, #32, #32\n\tsub\tx12, x12, x23\t\t// remove bias\n\tsub\tx13, x13, x23\n\tsub\tx14, x14, x23\n\tsub\tx15, x15, x23\n\n\tret\n\n\n.def\t__inner_loop_62_256;\n.type\t32;\n.endef\n.p2align\t4\n__inner_loop_62_256:\n\tmov\tx12, #1\t\t// |f0|=1\n\tmov\tx13, #0\t\t// |g0|=0\n\tmov\tx14, #0\t\t// |f1|=0\n\tmov\tx15, #1\t\t// |g1|=1\n\n.Loop_62_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x12\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tmov\tx20, x13\n\tcsel\tx12, x12, x14,       hs\t// exchange |f0| and |f1|\n\tcsel\tx14, x14, x19,     hs\n\tcsel\tx13, x13, x15,       hs\t// exchange |g0| and |g1|\n\tcsel\tx15, x15, x20,     hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x14, x22\n\tand\tx20, x15, x22\n\tadd\tx14, x14, x14\t\t// |f1|<<=1\n\tadd\tx15, x15, x15\t\t// |g1|<<=1\n\tsub\tx12, x12, x19\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx13, x13, x20\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, .Loop_62_256\n\n\tret\n\n"
  },
  {
    "path": "build/coff/ct_inverse_mod_256-x86_64.s",
    "content": ".text\t\n\n.globl\tct_inverse_mod_256\n\n.def\tct_inverse_mod_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nct_inverse_mod_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_ct_inverse_mod_256:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$1072,%rsp\n\n.LSEH_body_ct_inverse_mod_256:\n\n\n\tleaq\t48+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tmovq\t0(%rdx),%r12\n\tmovq\t8(%rdx),%r13\n\tmovq\t16(%rdx),%r14\n\tmovq\t24(%rdx),%r15\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rax,%rsi\n\n\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmovq\t%rdx,64(%rdi)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmovq\t%rdx,72(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\n\tmovq\t64(%rsi),%r8\n\tmovq\t104(%rsi),%r12\n\tmovq\t%r8,%r9\n\timulq\t0(%rsp),%r8\n\tmovq\t%r12,%r13\n\timulq\t8(%rsp),%r12\n\taddq\t%r12,%r8\n\tmovq\t%r8,32(%rdi)\n\tsarq\t$63,%r8\n\tmovq\t%r8,40(%rdi)\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r8,56(%rdi)\n\tmovq\t%r8,64(%rdi)\n\tleaq\t64(%rsi),%rsi\n\n\timulq\t%rdx,%r9\n\timulq\t%rcx,%r13\n\taddq\t%r13,%r9\n\tmovq\t%r9,72(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tmovq\t%r9,104(%rdi)\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\tsarq\t$63,%rbp\n\tmovq\t%rbp,40(%rdi)\n\tmovq\t%rbp,48(%rdi)\n\tmovq\t%rbp,56(%rdi)\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\n\txorq\t$256+64,%rsi\n\tmovl\t$47,%edx\n\n\tmovq\t0(%rsi),%r8\n\n\tmovq\t32(%rsi),%r10\n\n\tcall\t__inner_loop_62_256\n\n\n\n\n\n\n\n\tleaq\t64(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulq_512x63\n\tadcq\t%rbp,%rdx\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rdx\n\n\tmovq\t%rdx,%r8\n\tmovq\t%rdx,%r9\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tandq\t0(%rsi),%r8\n\tmovq\t%rdx,%r10\n\tandq\t8(%rsi),%r9\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%rdx\n\n\taddq\t%r8,%r12\n\tadcq\t%r9,%r13\n\tadcq\t%r10,%r14\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%rax\n\n\tmovq\t%rax,%rdx\n\tnegq\t%rax\n\torq\t%rax,%rdx\n\tsarq\t$63,%rax\n\n\tmovq\t%rdx,%r8\n\tmovq\t%rdx,%r9\n\tandq\t0(%rsi),%r8\n\tmovq\t%rdx,%r10\n\tandq\t8(%rsi),%r9\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rcx,%rcx\n\txorq\t%rax,%r9\n\tsubq\t%rax,%rcx\n\txorq\t%rax,%r10\n\txorq\t%rax,%rdx\n\taddq\t%rcx,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%rdx\n\n\taddq\t%r8,%r12\n\tadcq\t%r9,%r13\n\tadcq\t%r10,%r14\n\tadcq\t%rdx,%r15\n\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\n\tleaq\t1072(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_ct_inverse_mod_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_ct_inverse_mod_256:\n.def\t__smulq_512x63;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_512x63:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%rbp\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%rbp\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%rbp\n\n\tmulq\t%rbx\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%rdx,%r11\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t40(%rsi),%r8\n\tmovq\t48(%rsi),%r9\n\tmovq\t56(%rsi),%r10\n\tmovq\t64(%rsi),%r11\n\tmovq\t72(%rsi),%r12\n\tmovq\t80(%rsi),%r13\n\tmovq\t88(%rsi),%r14\n\tmovq\t96(%rsi),%r15\n\n\tmovq\t%rcx,%rdx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rcx\n\taddq\t%rax,%rcx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\n\tmulq\t%rcx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rcx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rcx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rcx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rcx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rcx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rcx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\timulq\t%rcx\n\taddq\t%rax,%r15\n\tadcq\t$0,%rdx\n\n\tmovq\t%rbp,%rbx\n\tsarq\t$63,%rbp\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t%rbx,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t%rbp,%r14\n\tadcq\t%rbp,%r15\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__smulq_256x63;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_256x63:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%rbp\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%rbp\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%rbp\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t%rcx,%rdx\n\tmovq\t40+0(%rsi),%r12\n\tmovq\t40+8(%rsi),%r13\n\tmovq\t40+16(%rsi),%r14\n\tmovq\t40+24(%rsi),%r15\n\tmovq\t40+32(%rsi),%rcx\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\txorq\t%rdx,%rcx\n\taddq\t%r12,%rax\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rcx\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tmovq\t%rdx,%r13\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tandq\t%rbx,%rcx\n\tnegq\t%rcx\n\tmulq\t%rbx\n\taddq\t%rax,%r15\n\tadcq\t%rdx,%rcx\n\taddq\t%r12,%r8\n\tadcq\t%r13,%r9\n\tadcq\t%r14,%r10\n\tadcq\t%r15,%r11\n\tadcq\t%rcx,%rbp\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__smulq_256_n_shift_by_31;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_256_n_shift_by_31:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,0(%rdi)\n\tmovq\t%rcx,8(%rdi)\n\tmovq\t%rdx,%rbp\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\n\tmovq\t%rbp,%rbx\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t32+0(%rsi),%r12\n\tmovq\t32+8(%rsi),%r13\n\tmovq\t32+16(%rsi),%r14\n\tmovq\t32+24(%rsi),%r15\n\n\tmovq\t%rcx,%rbx\n\tsarq\t$63,%rcx\n\txorq\t%rax,%rax\n\tsubq\t%rcx,%rax\n\n\txorq\t%rcx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rcx,%r12\n\txorq\t%rcx,%r13\n\txorq\t%rcx,%r14\n\txorq\t%rcx,%r15\n\taddq\t%r12,%rax\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tandq\t%rbx,%rcx\n\tnegq\t%rcx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tmulq\t%rbx\n\taddq\t%rax,%r15\n\tadcq\t%rdx,%rcx\n\taddq\t%r12,%r8\n\tadcq\t%r13,%r9\n\tadcq\t%r14,%r10\n\tadcq\t%r15,%r11\n\tadcq\t%rcx,%rbp\n\n\tmovq\t0(%rdi),%rdx\n\tmovq\t8(%rdi),%rcx\n\n\tshrdq\t$31,%r9,%r8\n\tshrdq\t$31,%r10,%r9\n\tshrdq\t$31,%r11,%r10\n\tshrdq\t$31,%rbp,%r11\n\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\txorq\t%rbp,%rdx\n\txorq\t%rbp,%rcx\n\taddq\t%rax,%rdx\n\taddq\t%rax,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__ab_approximation_31_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__ab_approximation_31_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t24(%rsi),%r9\n\tmovq\t56(%rsi),%r11\n\tmovq\t16(%rsi),%rbx\n\tmovq\t48(%rsi),%rbp\n\tmovq\t8(%rsi),%r8\n\tmovq\t40(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t0(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t32(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r9\n\tcmovzq\t%r10,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tmovl\t$0x7FFFFFFF,%eax\n\tandq\t%rax,%r8\n\tandq\t%rax,%r10\n\tnotq\t%rax\n\tandq\t%rax,%r9\n\tandq\t%rax,%r11\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\n\tjmp\t__inner_loop_31_256\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__inner_loop_31_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__inner_loop_31_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rcx\n\tmovq\t$0x800000007FFFFFFF,%r13\n\tmovq\t$0x7FFFFFFF7FFFFFFF,%r15\n\n.Loop_31_256:\n\tcmpq\t%r10,%r8\n\tmovq\t%r8,%rax\n\tmovq\t%r10,%rbx\n\tmovq\t%rcx,%rbp\n\tmovq\t%r13,%r14\n\tcmovbq\t%r10,%r8\n\tcmovbq\t%rax,%r10\n\tcmovbq\t%r13,%rcx\n\tcmovbq\t%rbp,%r13\n\n\tsubq\t%r10,%r8\n\tsubq\t%r13,%rcx\n\taddq\t%r15,%rcx\n\n\ttestq\t$1,%rax\n\tcmovzq\t%rax,%r8\n\tcmovzq\t%rbx,%r10\n\tcmovzq\t%rbp,%rcx\n\tcmovzq\t%r14,%r13\n\n\tshrq\t$1,%r8\n\taddq\t%r13,%r13\n\tsubq\t%r15,%r13\n\tsubl\t$1,%edx\n\tjnz\t.Loop_31_256\n\n\tshrq\t$32,%r15\n\tmovl\t%ecx,%edx\n\tmovl\t%r13d,%r12d\n\tshrq\t$32,%rcx\n\tshrq\t$32,%r13\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\tsubq\t%r15,%r12\n\tsubq\t%r15,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__inner_loop_62_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__inner_loop_62_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovl\t%edx,%r15d\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t%rdx,%r13\n\tmovq\t%rdx,%r14\n\n.Loop_62_256:\n\txorq\t%rax,%rax\n\ttestq\t%r14,%r8\n\tmovq\t%r10,%rbx\n\tcmovnzq\t%r10,%rax\n\tsubq\t%r8,%rbx\n\tmovq\t%r8,%rbp\n\tsubq\t%rax,%r8\n\tcmovcq\t%rbx,%r8\n\tcmovcq\t%rbp,%r10\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrq\t$1,%r8\n\ttestq\t%r14,%rbp\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%r15d\n\tjnz\t.Loop_62_256\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_ct_inverse_mod_256\n.rva\t.LSEH_body_ct_inverse_mod_256\n.rva\t.LSEH_info_ct_inverse_mod_256_prologue\n\n.rva\t.LSEH_body_ct_inverse_mod_256\n.rva\t.LSEH_epilogue_ct_inverse_mod_256\n.rva\t.LSEH_info_ct_inverse_mod_256_body\n\n.rva\t.LSEH_epilogue_ct_inverse_mod_256\n.rva\t.LSEH_end_ct_inverse_mod_256\n.rva\t.LSEH_info_ct_inverse_mod_256_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_ct_inverse_mod_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_ct_inverse_mod_256_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x86,0x00\n.byte\t0x00,0xe4,0x87,0x00\n.byte\t0x00,0xd4,0x88,0x00\n.byte\t0x00,0xc4,0x89,0x00\n.byte\t0x00,0x34,0x8a,0x00\n.byte\t0x00,0x54,0x8b,0x00\n.byte\t0x00,0x74,0x8d,0x00\n.byte\t0x00,0x64,0x8e,0x00\n.byte\t0x00,0x01,0x8c,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_ct_inverse_mod_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/ct_inverse_mod_384-armv8.S",
    "content": ".text\n\n.globl\tct_inverse_mod_384\n\n.def\tct_inverse_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nct_inverse_mod_384:\n\thint\t#25\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1056\n\n\tldp\tx22,   x4, [x1,#8*0]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx7, x8, [x1,#8*4]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#32+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #32+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstp\tx0, x3, [sp]\t\t// offload out_ptr, nx_ptr\n\n\tldp\tx9, x10, [x2,#8*0]\n\tldp\tx11, x12, [x2,#8*2]\n\tldp\tx13, x14, [x2,#8*4]\n\n\tstp\tx22,   x4, [x1,#8*0]\t// copy input to |a|\n\tstp\tx5, x6, [x1,#8*2]\n\tstp\tx7, x8, [x1,#8*4]\n\tstp\tx9, x10, [x1,#8*6]\t// copy modulus to |b|\n\tstp\tx11, x12, [x1,#8*8]\n\tstp\tx13, x14, [x1,#8*10]\n\n\t////////////////////////////////////////// first iteration\n\tmov\tx2, #62\n\tbl\t.Lab_approximation_62_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15,[x0,#8*12]\t\t// initialize |u| with |f0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15, [x0,#8*14]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tldr\tx7, [x1,#8*12]\t// |u|\n\tldr\tx8, [x1,#8*20]\t// |v|\n\tmul\tx3, x20, x7\t\t// |u|*|f0|\n\tsmulh\tx4, x20, x7\n\tmul\tx5, x21, x8\t\t// |v|*|g0|\n\tsmulh\tx6, x21, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*6]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [x0,#8*8]\n\tstp\tx5, x5, [x0,#8*10]\n\n\tmul\tx3, x15, x7\t\t// |u|*|f1|\n\tsmulh\tx4, x15, x7\n\tmul\tx5, x16, x8\t\t// |v|*|g1|\n\tsmulh\tx6, x16, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*14]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [x0,#8*16]\n\tstp\tx5, x5, [x0,#8*18]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\n\tstr\tx27, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\t\t// sign extension\n\tstp\tx27, x27, [x0,#8*6]\n\tstp\tx27, x27, [x0,#8*8]\n\tstp\tx27, x27, [x0,#8*10]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\t////////////////////////////////////////// iteration before last\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldp\tx3, x8, [x1,#8*0]\t// just load\n\tldp\tx9, x14, [x1,#8*6]\n\tbl\t__inner_loop_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tstr\tx3, [x0,#8*0]\n\tstr\tx9, [x0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f0|\n\tmov\tx21, x16\t\t\t// exact |g0|\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*12\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f1|\n\tmov\tx21, x16\t\t\t// exact |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\t////////////////////////////////////////// last iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #24\t\t\t// 768 % 62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldr\tx3, [x1,#8*0]\t\t// just load\n\teor\tx8, x8, x8\n\tldr\tx9, [x1,#8*6]\n\teor\tx14, x14, x14\n\tbl\t__inner_loop_62\n\n\tmov\tx20, x17\n\tmov\tx21, x19\n\tldp\tx0, x15, [sp]\t\t\t// original out_ptr and n_ptr\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx23, x8, x21\t\t// figure out top-most limb\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x15,#8*0]\t// load |mod|\n\tadd\tx23, x23, x26\t\t// x23 is 1, 0 or -1\n\tldp\tx11, x12, [x15,#8*2]\n\tasr\tx22, x23, #63\t\t// sign as mask\n\tldp\tx13, x14, [x15,#8*4]\n\n\tand\tx26,   x9, x22\t\t// add mod<<384 conditionally\n\tand\tx27,   x10, x22\n\tadds\tx3, x3, x26\n\tand\tx28,   x11, x22\n\tadcs\tx4, x4, x27\n\tand\tx2,   x12, x22\n\tadcs\tx5, x5, x28\n\tand\tx26,   x13, x22\n\tadcs\tx6, x6, x2\n\tand\tx27,   x14, x22\n\tadcs\tx7, x7, x26\n\tadcs\tx8, x25,   x27\n\tadc\tx23, x23, xzr\t\t// x23 is 1, 0 or -1\n\n\tneg\tx22, x23\n\torr\tx23, x23, x22\t\t// excess bit or sign as mask\n\tasr\tx22, x22, #63\t\t// excess bit as mask\n\n\tand\tx9, x9, x23\t\t// mask |mod|\n\tand\tx10, x10, x23\n\tand\tx11, x11, x23\n\tand\tx12, x12, x23\n\tand\tx13, x13, x23\n\tand\tx14, x14, x23\n\n\teor\tx9,  x9, x22\t// conditionally negate |mod|\n\teor\tx10,  x10, x22\n\tadds\tx9,  x9, x22, lsr#63\n\teor\tx11,  x11, x22\n\tadcs\tx10,  x10, xzr\n\teor\tx12,  x12, x22\n\tadcs\tx11,  x11, xzr\n\teor\tx13, x13, x22\n\tadcs\tx12,  x12, xzr\n\teor\tx14, x14, x22\n\tadcs\tx13, x13, xzr\n\tadc\tx14, x14, xzr\n\n\tadds\tx3, x3, x9\t// final adjustment for |mod|<<384\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*8]\n\tadc\tx8, x8, x14\n\tstp\tx7, x8, [x0,#8*10]\n\n\tadd\tsp, sp, #1056\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n////////////////////////////////////////////////////////////////////////\n// see corresponding commentary in ctx_inverse_mod_384-x86_64...\n.def\t__smul_384x63;\n.type\t32;\n.endef\n.p2align\t5\n__smul_384x63:\n\tldp\tx3, x4, [x1,#8*0+96]\t// load |u| (or |v|)\n\tasr\tx17, x20, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx5, x6, [x1,#8*2+96]\n\teor\tx20, x20, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx7, x8, [x1,#8*4+96]\n\n\teor\tx3, x3, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx25, [x1,#8*6+96]\n\tsub\tx20, x20, x17\n\teor\tx4, x4, x17\n\tadds\tx3, x3, x17, lsr#63\n\teor\tx5, x5, x17\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x17\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x3, x20\n\teor\tx8, x8, x17\n\tumulh\tx23, x4, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x5, x20\n\teor\tx25, x25, x17\n\tmul\tx3, x3, x20\n\tadcs\tx8, x8, xzr\n\tmul\tx4, x4, x20\n\tadcs\tx25, x25, xzr\n\tcmp\tx20, #0\n\tmul\tx5, x5, x20\n\tcsel\tx25, x25, xzr, ne\n\tadds\tx4, x4, x22\n\tumulh\tx22, x6, x20\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x7, x20\n\tmul\tx6, x6, x20\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x24\n\tmul\tx27,x8, x20\n\tadcs\tx7, x7, x22\n\tadcs\tx27,x27,x23\n\tadc\tx2, xzr, xzr\n\tldp\tx9, x10, [x1,#8*0+160]\t// load |u| (or |v|)\n\tasr\tx17, x21, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx11, x12, [x1,#8*2+160]\n\teor\tx21, x21, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx13, x14, [x1,#8*4+160]\n\n\teor\tx9, x9, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx26, [x1,#8*6+160]\n\tsub\tx21, x21, x17\n\teor\tx10, x10, x17\n\tadds\tx9, x9, x17, lsr#63\n\teor\tx11, x11, x17\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x17\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x17\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x9, x21\n\teor\tx14, x14, x17\n\tumulh\tx23, x10, x21\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x11, x21\n\teor\tx26, x26, x17\n\tmul\tx9, x9, x21\n\tadcs\tx14, x14, xzr\n\tmul\tx10, x10, x21\n\tadcs\tx26, x26, xzr\n\tadc\tx19, xzr, xzr\t\t// used in __smul_768x63_tail\n\tcmp\tx21, #0\n\tmul\tx11, x11, x21\n\tcsel\tx26, x26, xzr, ne\n\tadds\tx10, x10, x22\n\tumulh\tx22, x12, x21\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x13, x21\n\tmul\tx12, x12, x21\n\tmul\tx13, x13, x21\n\tadcs\tx12, x12, x24\n\tmul\tx28,x14, x21\n\tadcs\tx13, x13, x22\n\tadcs\tx28,x28,x23\n\tadc\tx2, x2, xzr\n\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*2]\n\tadcs\tx27,   x27,   x28\n\tstp\tx7, x27,   [x0,#8*4]\n\n\tret\n\n\n.def\t__smul_768x63_tail;\n.type\t32;\n.endef\n.p2align\t5\n__smul_768x63_tail:\n\tumulh\tx27, x8, x20\n\tldr\tx4, [x1,#8*27]// load rest of |v|\n\tadc\tx2, x2, xzr\n\tldp\tx5, x6, [x1,#8*28]\n\tand\tx25, x25, x20\n\tldp\tx7, x8, [x1,#8*30]\n\tsub\tx27, x27, x25\t// tie up |u|*|f1| chain\n\n\tumulh\tx14, x14, x21\t// resume |v|*|g1| chain\n\teor\tx4, x4, x17\t// conditionally negate rest of |v|\n\teor\tx5, x5, x17\n\teor\tx6, x6, x17\n\tadds\tx4, x4, x19\n\teor\tx7, x7, x17\n\tadcs\tx5, x5, xzr\n\teor\tx8, x8, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x26,   x21\n\tadcs\tx7, x7, xzr\n\tumulh\tx23, x4, x21\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x21\n\tadd\tx14, x14, x2\n\tumulh\tx25, x6, x21\n\tasr\tx28, x27, #63\n\tumulh\tx2, x7, x21\n\tmul\tx3, x26,   x21\n\tmul\tx4, x4, x21\n\tmul\tx5, x5, x21\n\tadds\tx3, x3, x14\n\tmul\tx6, x6, x21\n\tadcs\tx4, x4, x22\n\tmul\tx7, x7, x21\n\tadcs\tx5, x5, x23\n\tmul\tx22,   x8, x21\n\tadcs\tx6, x6, x24\n\tadcs\tx7, x7, x25\n\tadcs\tx25,   x22, x2\n\tadc\tx26, xzr, xzr\t\t// used in the final step\n\n\tadds\tx3, x3, x27\n\tadcs\tx4, x4, x28\n\tadcs\tx5, x5, x28\n\tadcs\tx6, x6, x28\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x28\n\tstp\tx5, x6, [x0,#8*8]\n\tadcs\tx25,   x25,   x28\t// carry is used in the final step\n\tstp\tx7, x25,   [x0,#8*10]\n\n\tret\n\n\n.def\t__smul_384_n_shift_by_62;\n.type\t32;\n.endef\n.p2align\t5\n__smul_384_n_shift_by_62:\n\tldp\tx3, x4, [x1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx28, x15, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx5, x6, [x1,#8*2+0]\n\teor\tx2, x15, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx7, x8, [x1,#8*4+0]\n\n\teor\tx3, x3, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tumulh\tx22, x3, x2\n\tadcs\tx6, x6, xzr\n\tumulh\tx23, x4, x2\n\teor\tx8, x8, x28\n\tmul\tx3, x3, x2\n\tadcs\tx7, x7, xzr\n\tmul\tx4, x4, x2\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x6, x2\n\tadds\tx4, x4, x22\n\tmul\tx5, x5, x2\n\tumulh\tx22, x7, x2\n\tneg\tx28, x28\n\tmul\tx6, x6, x2\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x8, x2\n\tmul\tx7, x7, x2\n\tadcs\tx6, x6, x24\n\tmul\tx8, x8, x2\n\tadcs\tx7, x7, x25\n\tadcs\tx8, x8, x22\n\tadc\tx27, x23, x28\n\tldp\tx9, x10, [x1,#8*0+48]\t// load |a| (or |b|)\n\tasr\tx28, x16, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx11, x12, [x1,#8*2+48]\n\teor\tx2, x16, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx13, x14, [x1,#8*4+48]\n\n\teor\tx9, x9, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx10, x10, x28\n\tadds\tx9, x9, x28, lsr#63\n\teor\tx11, x11, x28\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x28\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x28\n\tumulh\tx22, x9, x2\n\tadcs\tx12, x12, xzr\n\tumulh\tx23, x10, x2\n\teor\tx14, x14, x28\n\tmul\tx9, x9, x2\n\tadcs\tx13, x13, xzr\n\tmul\tx10, x10, x2\n\tadc\tx14, x14, xzr\n\n\tumulh\tx24, x11, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x12, x2\n\tadds\tx10, x10, x22\n\tmul\tx11, x11, x2\n\tumulh\tx22, x13, x2\n\tneg\tx28, x28\n\tmul\tx12, x12, x2\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x14, x2\n\tmul\tx13, x13, x2\n\tadcs\tx12, x12, x24\n\tmul\tx14, x14, x2\n\tadcs\tx13, x13, x25\n\tadcs\tx14, x14, x22\n\tadc\tx28, x23, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x27,   x28\n\n\textr\tx3, x4, x3, #62\n\textr\tx4, x5, x4, #62\n\textr\tx5, x6, x5, #62\n\tasr\tx28, x9, #63\n\textr\tx6, x7, x6, #62\n\textr\tx7, x8, x7, #62\n\textr\tx8, x9, x8, #62\n\n\teor\tx3, x3, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x28\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x0,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x0,#8*4]\n\n\teor\tx15, x15, x28\n\teor\tx16, x16, x28\n\tsub\tx15, x15, x28\n\tsub\tx16, x16, x28\n\n\tret\n\n.def\t__ab_approximation_62;\n.type\t32;\n.endef\n.p2align\t4\n__ab_approximation_62:\n\tldp\tx7, x8, [x1,#8*4]\n\tldp\tx13, x14, [x1,#8*10]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx11, x12, [x1,#8*8]\n\n.Lab_approximation_62_loaded:\n\torr\tx22, x8, x14\t// check top-most limbs, ...\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx22, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tldp\tx3, x4, [x1,#8*0]\n\tldp\tx9, x10, [x1,#8*6]\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx22, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx22, x8, x14\n\tcsel\tx13, x13, x10, ne\n\n\tclz\tx22, x22\n\tcmp\tx22, #64\n\tcsel\tx22, x22, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx23, x22\n\n\tlslv\tx8, x8, x22\t// align high limbs to the left\n\tlslv\tx14, x14, x22\n\tlsrv\tx7, x7, x23\n\tlsrv\tx13, x13, x23\n\tand\tx7, x7, x23, asr#6\n\tand\tx13, x13, x23, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tb\t__inner_loop_62\n\tret\n\n.def\t__inner_loop_62;\n.type\t32;\n.endef\n.p2align\t4\n__inner_loop_62:\n\tmov\tx15, #1\t\t// |f0|=1\n\tmov\tx16, #0\t\t// |g0|=0\n\tmov\tx17, #0\t\t// |f1|=0\n\tmov\tx19, #1\t\t// |g1|=1\n\n.Loop_62:\n\tsbfx\tx28, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tsubs\tx24, x9, x3\t// |b_|-|a_|\n\tand\tx22, x9, x28\n\tsbc\tx25, x14, x8\n\tand\tx23, x14, x28\n\tsubs\tx26, x3, x22\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx22, x15\n\tsbcs\tx27, x8, x23\n\tmov\tx23, x16\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx14, x14, x8, hs\n\tcsel\tx3, x26, x24, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx8, x27, x25, hs\n\tcsel\tx15, x15, x17,       hs\t// exchange |f0| and |f1|\n\tcsel\tx17, x17, x22,     hs\n\tcsel\tx16, x16, x19,       hs\t// exchange |g0| and |g1|\n\tcsel\tx19, x19, x23,     hs\n\textr\tx3, x8, x3, #1\n\tlsr\tx8, x8, #1\n\tand\tx22, x17, x28\n\tand\tx23, x19, x28\n\tadd\tx17, x17, x17\t\t// |f1|<<=1\n\tadd\tx19, x19, x19\t\t// |g1|<<=1\n\tsub\tx15, x15, x22\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx16, x16, x23\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, .Loop_62\n\n\tret\n\n"
  },
  {
    "path": "build/coff/ct_is_square_mod_384-armv8.S",
    "content": ".text\n\n.globl\tct_is_square_mod_384\n\n.def\tct_is_square_mod_384;\n.type\t32;\n.endef\n.p2align\t5\nct_is_square_mod_384:\n\thint\t#25\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #512\n\n\tldp\tx3, x4, [x0,#8*0]\t\t// load input\n\tldp\tx5, x6, [x0,#8*2]\n\tldp\tx7, x8, [x0,#8*4]\n\n\tadd\tx0, sp, #255\t// find closest 256-byte-aligned spot\n\tand\tx0, x0, #-256\t// in the frame...\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\n\tldp\tx9, x10, [x1,#8*0]\t\t// load modulus\n\tldp\tx11, x12, [x1,#8*2]\n\tldp\tx13, x14, [x1,#8*4]\n\n\tstp\tx3, x4, [x0,#8*6]\t// copy input to |a|\n\tstp\tx5, x6, [x0,#8*8]\n\tstp\tx7, x8, [x0,#8*10]\n\tstp\tx9, x10, [x0,#8*0]\t// copy modulus to |b|\n\tstp\tx11, x12, [x0,#8*2]\n\tstp\tx13, x14, [x0,#8*4]\n\n\teor\tx2, x2, x2\t\t\t// init the .Legendre symbol\n\tmov\tx15, #24\t\t\t// 24 is 768/30-1\n\tb\t.Loop_is_square\n\n.p2align\t4\n.Loop_is_square:\n\tbl\t__ab_approximation_30\n\tsub\tx15, x15, #1\n\n\teor\tx1, x0, #128\t\t// pointer to dst |b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,csp,x1\n#endif\n\tbl\t__smul_384_n_shift_by_30\n\n\tmov\tx19, x16\t\t\t// |f0|\n\tmov\tx20, x17\t\t\t// |g0|\n\tadd\tx1,x1,#8*6\n\tbl\t__smul_384_n_shift_by_30\n\n\tldp\tx9, x10, [x1,#-8*6]\n\teor\tx0, x0, #128\t\t// flip-flop src |a|b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\tand\tx27, x27, x9\t\t// if |a| was negative,\n\tadd\tx2, x2, x27, lsr#1\t\t// adjust |L|\n\n\tcbnz\tx15, .Loop_is_square\n\n\t////////////////////////////////////////// last iteration\n\t//bl\t__ab_approximation_30\t\t// |a| and |b| are exact,\n\t//ldr\tx8, [x0,#8*6]\t\t// and loaded\n\t//ldr\tx14, [x0,#8*0]\n\tmov\tx15, #48\t\t\t// 48 is 768%30 + 30\n\tbl\t__inner_loop_48\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tand\tx0, x2, #1\n\teor\tx0, x0, #1\n\n\tadd\tsp, sp, #512\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__smul_384_n_shift_by_30;\n.type\t32;\n.endef\n.p2align\t5\n__smul_384_n_shift_by_30:\n\tldp\tx3, x4, [x0,#8*0+0]\t// load |b| (or |a|)\n\tasr\tx27, x20, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx5, x6, [x0,#8*2+0]\n\teor\tx20, x20, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx7, x8, [x0,#8*4+0]\n\n\teor\tx3, x3, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx20, x20, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tumulh\tx21, x3, x20\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x4, x20\n\teor\tx8, x8, x27\n\tumulh\tx23, x5, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x6, x20\n\tadc\tx8, x8, xzr\n\n\tumulh\tx25, x7, x20\n\tand\tx28, x20, x27\n\tumulh\tx26, x8, x20\n\tneg\tx28, x28\n\tmul\tx3, x3, x20\n\tmul\tx4, x4, x20\n\tmul\tx5, x5, x20\n\tadds\tx4, x4, x21\n\tmul\tx6, x6, x20\n\tadcs\tx5, x5, x22\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x23\n\tmul\tx8, x8, x20\n\tadcs\tx7, x7, x24\n\tadcs\tx8, x8 ,x25\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x0,#8*0+48]\t// load |b| (or |a|)\n\tasr\tx27, x19, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx11, x12, [x0,#8*2+48]\n\teor\tx19, x19, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx13, x14, [x0,#8*4+48]\n\n\teor\tx9, x9, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx19, x19, x27\n\teor\tx10, x10, x27\n\tadds\tx9, x9, x27, lsr#63\n\teor\tx11, x11, x27\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x27\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x27\n\tumulh\tx21, x9, x19\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x10, x19\n\teor\tx14, x14, x27\n\tumulh\tx23, x11, x19\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x12, x19\n\tadc\tx14, x14, xzr\n\n\tumulh\tx25, x13, x19\n\tand\tx28, x19, x27\n\tumulh\tx27, x14, x19\n\tneg\tx28, x28\n\tmul\tx9, x9, x19\n\tmul\tx10, x10, x19\n\tmul\tx11, x11, x19\n\tadds\tx10, x10, x21\n\tmul\tx12, x12, x19\n\tadcs\tx11, x11, x22\n\tmul\tx13, x13, x19\n\tadcs\tx12, x12, x23\n\tmul\tx14, x14, x19\n\tadcs\tx13, x13, x24\n\tadcs\tx14, x14 ,x25\n\tadc\tx27, x27, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x26,   x27\n\n\textr\tx3, x4, x3, #30\n\textr\tx4, x5, x4, #30\n\textr\tx5, x6, x5, #30\n\tasr\tx27, x9, #63\n\textr\tx6, x7, x6, #30\n\textr\tx7, x8, x7, #30\n\textr\tx8, x9, x8, #30\n\n\teor\tx3, x3, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x27\n\tstp\tx3, x4, [x1,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x1,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x1,#8*4]\n\n\tret\n\n.def\t__ab_approximation_30;\n.type\t32;\n.endef\n.p2align\t4\n__ab_approximation_30:\n\tldp\tx13, x14, [x0,#8*4]\t// |a| is still in registers\n\tldp\tx11, x12, [x0,#8*2]\n\n\torr\tx21, x8, x14\t// check top-most limbs, ...\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx21, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx21, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx21, x8, x14\t// and one more, ...\n\tcsel\tx13, x13, x10, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x3, ne\n\torr\tx21, x8, x14\n\tcsel\tx13, x13, x9, ne\n\n\tclz\tx21, x21\n\tcmp\tx21, #64\n\tcsel\tx21, x21, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx22, x21\n\n\tlslv\tx8, x8, x21\t// align high limbs to the left\n\tlslv\tx14, x14, x21\n\tlsrv\tx7, x7, x22\n\tlsrv\tx13, x13, x22\n\tand\tx7, x7, x22, asr#6\n\tand\tx13, x13, x22, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tbfxil\tx8, x3, #0, #32\n\tbfxil\tx14, x9, #0, #32\n\n\tb\t__inner_loop_30\n\tret\n\n\n.def\t__inner_loop_30;\n.type\t32;\n.endef\n.p2align\t4\n__inner_loop_30:\n\tmov\tx28, #30\n\tmov\tx17, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx20, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx27,#0x7FFFFFFF7FFFFFFF\n\n.Loop_30:\n\tsbfx\tx24, x8, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x8, x14\n\tsub\tx28, x28, #1\n\tand\tx21, x14, x24\n\n\tsub\tx22, x14, x8\t\t// |b_|-|a_|\n\tsubs\tx23, x8, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\t// L + (a_ & b_) >> 1\n\tmov\tx21, x20\n\tcsel\tx14, x14, x8, hs\t// |b_| = |a_|\n\tcsel\tx8, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx20, x20, x17,  hs\t// exchange |fg0| and |fg1|\n\tcsel\tx17, x17, x21, hs\n\tcsel\tx2,   x2,   x25, hs\n\tlsr\tx8, x8, #1\n\tand\tx21, x20, x24\n\tand\tx22, x27, x24\n\tadd\tx23, x14, #2\n\tsub\tx17, x17, x21\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx20, x20, x20\t// |f1|<<=1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\tadd\tx17, x17, x22\n\tsub\tx20, x20, x27\n\n\tcbnz\tx28, .Loop_30\n\n\tmov\tx27, #0x7FFFFFFF\n\tubfx\tx16, x17, #0, #32\n\tubfx\tx17, x17, #32, #32\n\tubfx\tx19, x20, #0, #32\n\tubfx\tx20, x20, #32, #32\n\tsub\tx16, x16, x27\t\t// remove the bias\n\tsub\tx17, x17, x27\n\tsub\tx19, x19, x27\n\tsub\tx20, x20, x27\n\n\tret\n\n.def\t__inner_loop_48;\n.type\t32;\n.endef\n.p2align\t4\n__inner_loop_48:\n.Loop_48:\n\tsbfx\tx24, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x3, x9\n\tsub\tx15, x15, #1\n\tand\tx21, x9, x24\n\tsub\tx22, x9, x3\t\t// |b_|-|a_|\n\tsubs\tx23, x3, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx3, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx2,   x2,   x25, hs\n\tadd\tx23, x9, #2\n\tlsr\tx3, x3, #1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\n\tcbnz\tx15, .Loop_48\n\n\tret\n\n"
  },
  {
    "path": "build/coff/ct_is_square_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\tct_is_square_mod_384\n\n.def\tct_is_square_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nct_is_square_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_ct_is_square_mod_384:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$536,%rsp\n\n.LSEH_body_ct_is_square_mod_384:\n\n\n\tleaq\t24+255(%rsp),%rax\n\tandq\t$-256,%rax\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbx\n\tmovq\t24(%rsi),%rcx\n\tmovq\t32(%rsi),%rdx\n\tmovq\t40(%rsi),%rdi\n\tmovq\t%rax,%rsi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rcx,72(%rax)\n\tmovq\t%rdx,80(%rax)\n\tmovq\t%rdi,88(%rax)\n\n\txorq\t%rbp,%rbp\n\tmovl\t$24,%ecx\n\tjmp\t.Loop_is_square\n\n.p2align\t5\n.Loop_is_square:\n\tmovl\t%ecx,16(%rsp)\n\n\tcall\t__ab_approximation_30\n\tmovq\t%rax,0(%rsp)\n\tmovq\t%rbx,8(%rsp)\n\n\tmovq\t$128+48,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmovl\t16(%rsp),%ecx\n\txorq\t$128,%rsi\n\n\tandq\t48(%rdi),%r14\n\tshrq\t$1,%r14\n\taddq\t%r14,%rbp\n\n\tsubl\t$1,%ecx\n\tjnz\t.Loop_is_square\n\n\n\n\n\tmovq\t48(%rsi),%r9\n\tcall\t__inner_loop_48\n\n\tmovq\t$1,%rax\n\tandq\t%rbp,%rax\n\txorq\t$1,%rax\n\n\tleaq\t536(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_ct_is_square_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_ct_is_square_mod_384:\n\n.def\t__smulq_384_n_shift_by_30;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_384_n_shift_by_30:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%rdx,%r14\n\tandq\t%rbx,%r14\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tnegq\t%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\tleaq\t48(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%rdx,%r15\n\tandq\t%rbx,%r15\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tnegq\t%r15\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r15\n\tleaq\t-48(%rsi),%rsi\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\n\tshrdq\t$30,%r9,%r8\n\tshrdq\t$30,%r10,%r9\n\tshrdq\t$30,%r11,%r10\n\tshrdq\t$30,%r12,%r11\n\tshrdq\t$30,%r13,%r12\n\tshrdq\t$30,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbx,%rbx\n\tsubq\t%r14,%rbx\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbx,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__ab_approximation_30;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__ab_approximation_30:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t88(%rsi),%rbx\n\tmovq\t80(%rsi),%r15\n\tmovq\t72(%rsi),%r14\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r11,%r12\n\tmovq\t64(%rsi),%r11\n\tcmovzq\t%r14,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r10,%r12\n\tmovq\t56(%rsi),%r10\n\tcmovzq\t%r11,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r9,%r12\n\tmovq\t48(%rsi),%r9\n\tcmovzq\t%r10,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r8,%r12\n\tcmovzq\t%r9,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r13\n\tcmovzq\t%r9,%rbx\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%r12,%r13\n\tshldq\t%cl,%r15,%rbx\n\n\tmovq\t$0xFFFFFFFF00000000,%rax\n\tmovl\t%r8d,%r8d\n\tmovl\t%r9d,%r9d\n\tandq\t%rax,%r13\n\tandq\t%rax,%rbx\n\torq\t%r13,%r8\n\torq\t%rbx,%r9\n\n\tjmp\t__inner_loop_30\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__inner_loop_30;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__inner_loop_30:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rbx\n\tmovq\t$0x800000007FFFFFFF,%rcx\n\tleaq\t-1(%rbx),%r15\n\tmovl\t$30,%edi\n\n.Loop_30:\n\tmovq\t%r8,%rax\n\tandq\t%r9,%rax\n\tshrq\t$1,%rax\n\n\tcmpq\t%r9,%r8\n\tmovq\t%r8,%r10\n\tmovq\t%r9,%r11\n\tleaq\t(%rax,%rbp,1),%rax\n\tmovq\t%rbx,%r12\n\tmovq\t%rcx,%r13\n\tmovq\t%rbp,%r14\n\tcmovbq\t%r9,%r8\n\tcmovbq\t%r10,%r9\n\tcmovbq\t%rcx,%rbx\n\tcmovbq\t%r12,%rcx\n\tcmovbq\t%rax,%rbp\n\n\tsubq\t%r9,%r8\n\tsubq\t%rcx,%rbx\n\taddq\t%r15,%rbx\n\n\ttestq\t$1,%r10\n\tcmovzq\t%r10,%r8\n\tcmovzq\t%r11,%r9\n\tcmovzq\t%r12,%rbx\n\tcmovzq\t%r13,%rcx\n\tcmovzq\t%r14,%rbp\n\n\tleaq\t2(%r9),%rax\n\tshrq\t$1,%r8\n\tshrq\t$2,%rax\n\taddq\t%rcx,%rcx\n\tleaq\t(%rax,%rbp,1),%rbp\n\tsubq\t%r15,%rcx\n\n\tsubl\t$1,%edi\n\tjnz\t.Loop_30\n\n\tshrq\t$32,%r15\n\tmovl\t%ebx,%eax\n\tshrq\t$32,%rbx\n\tmovl\t%ecx,%edx\n\tshrq\t$32,%rcx\n\tsubq\t%r15,%rax\n\tsubq\t%r15,%rbx\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__inner_loop_48;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__inner_loop_48:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovl\t$48,%edi\n\n.Loop_48:\n\tmovq\t%r8,%rax\n\tandq\t%r9,%rax\n\tshrq\t$1,%rax\n\n\tcmpq\t%r9,%r8\n\tmovq\t%r8,%r10\n\tmovq\t%r9,%r11\n\tleaq\t(%rax,%rbp,1),%rax\n\tmovq\t%rbp,%r12\n\tcmovbq\t%r9,%r8\n\tcmovbq\t%r10,%r9\n\tcmovbq\t%rax,%rbp\n\n\tsubq\t%r9,%r8\n\n\ttestq\t$1,%r10\n\tcmovzq\t%r10,%r8\n\tcmovzq\t%r11,%r9\n\tcmovzq\t%r12,%rbp\n\n\tleaq\t2(%r9),%rax\n\tshrq\t$1,%r8\n\tshrq\t$2,%rax\n\taddq\t%rax,%rbp\n\n\tsubl\t$1,%edi\n\tjnz\t.Loop_48\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_ct_is_square_mod_384\n.rva\t.LSEH_body_ct_is_square_mod_384\n.rva\t.LSEH_info_ct_is_square_mod_384_prologue\n\n.rva\t.LSEH_body_ct_is_square_mod_384\n.rva\t.LSEH_epilogue_ct_is_square_mod_384\n.rva\t.LSEH_info_ct_is_square_mod_384_body\n\n.rva\t.LSEH_epilogue_ct_is_square_mod_384\n.rva\t.LSEH_end_ct_is_square_mod_384\n.rva\t.LSEH_info_ct_is_square_mod_384_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_ct_is_square_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_ct_is_square_mod_384_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x43,0x00\n.byte\t0x00,0xe4,0x44,0x00\n.byte\t0x00,0xd4,0x45,0x00\n.byte\t0x00,0xc4,0x46,0x00\n.byte\t0x00,0x34,0x47,0x00\n.byte\t0x00,0x54,0x48,0x00\n.byte\t0x00,0x74,0x4a,0x00\n.byte\t0x00,0x64,0x4b,0x00\n.byte\t0x00,0x01,0x49,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_ct_is_square_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/ctq_inverse_mod_384-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n.globl\tct_inverse_mod_384\n\n.def\tct_inverse_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nct_inverse_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_ct_inverse_mod_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tct_inverse_mod_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$1112,%rsp\n\n.LSEH_body_ct_inverse_mod_384:\n\n\n\tleaq\t88+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t0(%rdx),%r14\n\tmovq\t8(%rdx),%r15\n\tmovq\t16(%rdx),%rbx\n\tmovq\t24(%rdx),%rbp\n\tmovq\t32(%rdx),%rsi\n\tmovq\t40(%rdx),%rdi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rbp,72(%rax)\n\tmovq\t%rsi,80(%rax)\n\tmovq\t%rax,%rsi\n\tmovq\t%rdi,88(%rax)\n\n\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmovq\t%rdx,96(%rdi)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmovq\t%rdx,104(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\n\tmovq\t96(%rsi),%rax\n\tmovq\t152(%rsi),%r11\n\tmovq\t%rdx,%rbx\n\tmovq\t%rax,%r10\n\timulq\t56(%rsp)\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t64(%rsp)\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tleaq\t96(%rsi),%rsi\n\n\tmovq\t%r10,%rax\n\timulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t%rcx\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,104(%rdi)\n\tmovq\t%r9,112(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,120(%rdi)\n\tmovq\t%r9,128(%rdi)\n\tmovq\t%r9,136(%rdi)\n\tmovq\t%r9,144(%rdi)\n\tmovq\t%r9,152(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%r14,64(%rdi)\n\tmovq\t%r14,72(%rdi)\n\tmovq\t%r14,80(%rdi)\n\tmovq\t%r14,88(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t48(%rsi),%r10\n\tmovq\t56(%rsi),%r11\n\tcall\t__inner_loop_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r10,48(%rdi)\n\n\n\n\tleaq\t96(%rsi),%rsi\n\tleaq\t96(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\n\n\txorq\t$256+96,%rsi\n\tmovl\t$24,%edi\n\n\tmovq\t0(%rsi),%r8\n\txorq\t%r9,%r9\n\tmovq\t48(%rsi),%r10\n\txorq\t%r11,%r11\n\tcall\t__inner_loop_62\n\n\n\n\n\n\n\n\tleaq\t96(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulq_768x63\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%r13\n\tsarq\t$63,%r13\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\tadcq\t$0,%rdx\n\n\tmovq\t%rdx,%r13\n\tnegq\t%rdx\n\torq\t%rdx,%r13\n\tsarq\t$63,%rdx\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\txorq\t%rdx,%r8\n\txorq\t%rsi,%rsi\n\txorq\t%rdx,%r9\n\tsubq\t%rdx,%rsi\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\tleaq\t1112(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_ct_inverse_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_ct_inverse_mod_384:\n.def\t__smulq_768x63;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_768x63:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tleaq\t56(%rsi),%rsi\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmovq\t%r9,8(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmovq\t%r10,16(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%r11,24(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmovq\t%r12,32(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tsarq\t$63,%r14\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%rcx,%rdx\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%r15\n\tmovq\t64(%rsi),%rbx\n\tmovq\t72(%rsi),%rbp\n\tmovq\t80(%rsi),%rcx\n\tmovq\t88(%rsi),%rdi\n\n\tmovq\t%rdx,%rsi\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rsi\n\taddq\t%rax,%rsi\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\txorq\t%rdx,%rbx\n\txorq\t%rdx,%rbp\n\txorq\t%rdx,%rcx\n\txorq\t%rdx,%rdi\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rbx\n\tadcq\t$0,%rbp\n\tadcq\t$0,%rcx\n\tadcq\t$0,%rdi\n\n\tmulq\t%rsi\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rsi\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rsi\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rsi\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rsi\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rsi\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rsi\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tmulq\t%rsi\n\taddq\t%rax,%r15\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbx\n\tmulq\t%rsi\n\taddq\t%rax,%rbx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\tmulq\t%rsi\n\taddq\t%rax,%rbp\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rcx\n\tmulq\t%rsi\n\taddq\t%rax,%rcx\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rdi\n\timulq\t%rsi\n\tmovq\t8(%rsp),%rsi\n\taddq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\n\taddq\t0(%rsi),%r8\n\tadcq\t8(%rsi),%r9\n\tadcq\t16(%rsi),%r10\n\tadcq\t24(%rsi),%r11\n\tadcq\t32(%rsi),%r12\n\tadcq\t40(%rsi),%r13\n\tadcq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%rdi\n\tadcq\t%rdi,%r15\n\tadcq\t%rdi,%rbx\n\tadcq\t%rdi,%rbp\n\tadcq\t%rdi,%rcx\n\tadcq\t%rdi,%rax\n\tadcq\t%rdi,%rdx\n\n\tleaq\t(%rsi),%rdi\n\tmovq\t16(%rsp),%rsi\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__smulq_384x63;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_384x63:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t56(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,%r15\n\tmovq\t%r14,%rbx\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t-56(%rsi),%rsi\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t%r15,%r13\n\tadcq\t%rbx,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__smulq_384_n_shift_by_62;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulq_384_n_shift_by_62:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\tmovq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t48(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\tmovq\t%rdx,%r15\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r15\n\tnegq\t%r15\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r15\n\n\tleaq\t-48(%rsi),%rsi\n\tmovq\t%rbx,%rdx\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\n\tshrdq\t$62,%r9,%r8\n\tshrdq\t$62,%r10,%r9\n\tshrdq\t$62,%r11,%r10\n\tshrdq\t$62,%r12,%r11\n\tshrdq\t$62,%r13,%r12\n\tshrdq\t$62,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__ab_approximation_62;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__ab_approximation_62:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t40(%rsi),%r9\n\tmovq\t88(%rsi),%r11\n\tmovq\t32(%rsi),%rbx\n\tmovq\t80(%rsi),%rbp\n\tmovq\t24(%rsi),%r8\n\tmovq\t72(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t16(%rsi),%r8\n\tmovq\t64(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t8(%rsi),%r8\n\tmovq\t56(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t0(%rsi),%r8\n\tmovq\t48(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tjmp\t__inner_loop_62\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__inner_loop_62;\t.scl 3;\t.type 32;\t.endef\n.p2align\t3\n.long\t0\n__inner_loop_62:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t$1,%r13\n\tmovq\t%rsi,8(%rsp)\n\n.Loop_62:\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\ttestq\t$1,%r8\n\tmovq\t%r10,%rbp\n\tmovq\t%r11,%r14\n\tcmovnzq\t%r10,%rax\n\tcmovnzq\t%r11,%rbx\n\tsubq\t%r8,%rbp\n\tsbbq\t%r9,%r14\n\tmovq\t%r8,%r15\n\tmovq\t%r9,%rsi\n\tsubq\t%rax,%r8\n\tsbbq\t%rbx,%r9\n\tcmovcq\t%rbp,%r8\n\tcmovcq\t%r14,%r9\n\tcmovcq\t%r15,%r10\n\tcmovcq\t%rsi,%r11\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrdq\t$1,%r9,%r8\n\tshrq\t$1,%r9\n\ttestq\t$1,%r15\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%edi\n\tjnz\t.Loop_62\n\n\tmovq\t8(%rsp),%rsi\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rax\n\tlfence\n\tjmpq\t*%rax\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_ct_inverse_mod_384\n.rva\t.LSEH_body_ct_inverse_mod_384\n.rva\t.LSEH_info_ct_inverse_mod_384_prologue\n\n.rva\t.LSEH_body_ct_inverse_mod_384\n.rva\t.LSEH_epilogue_ct_inverse_mod_384\n.rva\t.LSEH_info_ct_inverse_mod_384_body\n\n.rva\t.LSEH_epilogue_ct_inverse_mod_384\n.rva\t.LSEH_end_ct_inverse_mod_384\n.rva\t.LSEH_info_ct_inverse_mod_384_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_ct_inverse_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_ct_inverse_mod_384_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x8b,0x00\n.byte\t0x00,0xe4,0x8c,0x00\n.byte\t0x00,0xd4,0x8d,0x00\n.byte\t0x00,0xc4,0x8e,0x00\n.byte\t0x00,0x34,0x8f,0x00\n.byte\t0x00,0x54,0x90,0x00\n.byte\t0x00,0x74,0x92,0x00\n.byte\t0x00,0x64,0x93,0x00\n.byte\t0x00,0x01,0x91,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_ct_inverse_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/ctx_inverse_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\tctx_inverse_mod_384\n\n.def\tctx_inverse_mod_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nctx_inverse_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_ctx_inverse_mod_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nct_inverse_mod_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$1112,%rsp\n\n.LSEH_body_ctx_inverse_mod_384:\n\n\n\tleaq\t88+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t0(%rdx),%r14\n\tmovq\t8(%rdx),%r15\n\tmovq\t16(%rdx),%rbx\n\tmovq\t24(%rdx),%rbp\n\tmovq\t32(%rdx),%rsi\n\tmovq\t40(%rdx),%rdi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rbp,72(%rax)\n\tmovq\t%rsi,80(%rax)\n\tmovq\t%rax,%rsi\n\tmovq\t%rdi,88(%rax)\n\n\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmovq\t%rdx,96(%rdi)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmovq\t%rdx,104(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\n\tmovq\t96(%rsi),%rax\n\tmovq\t152(%rsi),%r11\n\tmovq\t%rdx,%rbx\n\tmovq\t%rax,%r10\n\timulq\t56(%rsp)\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t64(%rsp)\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tleaq\t96(%rsi),%rsi\n\n\tmovq\t%r10,%rax\n\timulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t%rcx\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,104(%rdi)\n\tmovq\t%r9,112(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,120(%rdi)\n\tmovq\t%r9,128(%rdi)\n\tmovq\t%r9,136(%rdi)\n\tmovq\t%r9,144(%rdi)\n\tmovq\t%r9,152(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%r14,64(%rdi)\n\tmovq\t%r14,72(%rdi)\n\tmovq\t%r14,80(%rdi)\n\tmovq\t%r14,88(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\n\txorq\t$256+96,%rsi\n\tmovl\t$55,%edi\n\n\tmovq\t0(%rsi),%r8\n\n\tmovq\t48(%rsi),%r10\n\n\tcall\t__tail_loop_55\n\n\n\n\n\n\n\n\tleaq\t96(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulx_768x63\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%r13\n\tsarq\t$63,%r13\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\tadcq\t$0,%rdx\n\n\tmovq\t%rdx,%r13\n\tnegq\t%rdx\n\torq\t%rdx,%r13\n\tsarq\t$63,%rdx\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\txorq\t%rdx,%r8\n\txorq\t%rsi,%rsi\n\txorq\t%rdx,%r9\n\tsubq\t%rdx,%rsi\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\tleaq\t1112(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_ctx_inverse_mod_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_ctx_inverse_mod_384:\n.def\t__smulx_768x63;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulx_768x63:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tleaq\t56(%rsi),%rsi\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\txorq\t%rax,%r14\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tsarq\t$63,%r14\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%rcx,%rdx\n\tmovq\t%rcx,%rax\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%r15\n\tmovq\t64(%rsi),%rbx\n\tmovq\t72(%rsi),%rbp\n\tmovq\t80(%rsi),%rcx\n\tmovq\t88(%rsi),%rdi\n\n\tsarq\t$63,%rax\n\txorq\t%rsi,%rsi\n\tsubq\t%rax,%rsi\n\n\txorq\t%rax,%rdx\n\taddq\t%rsi,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\txorq\t%rax,%r14\n\txorq\t%rax,%r15\n\txorq\t%rax,%rbx\n\txorq\t%rax,%rbp\n\txorq\t%rax,%rcx\n\txorq\t%rdi,%rax\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rbx\n\tadcq\t$0,%rbp\n\tadcq\t$0,%rcx\n\tadcq\t$0,%rax\n\n\tmulxq\t%r8,%r8,%rsi\n\tmulxq\t%r9,%r9,%rdi\n\taddq\t%rsi,%r9\n\tmulxq\t%r10,%r10,%rsi\n\tadcq\t%rdi,%r10\n\tmulxq\t%r11,%r11,%rdi\n\tadcq\t%rsi,%r11\n\tmulxq\t%r12,%r12,%rsi\n\tadcq\t%rdi,%r12\n\tmulxq\t%r13,%r13,%rdi\n\tadcq\t%rsi,%r13\n\tmulxq\t%r14,%r14,%rsi\n\tadcq\t%rdi,%r14\n\tmulxq\t%r15,%r15,%rdi\n\tadcq\t%rsi,%r15\n\tmulxq\t%rbx,%rbx,%rsi\n\tadcq\t%rdi,%rbx\n\tmulxq\t%rbp,%rbp,%rdi\n\tadcq\t%rsi,%rbp\n\tmulxq\t%rcx,%rcx,%rsi\n\tadcq\t%rdi,%rcx\n\tmovq\t8(%rsp),%rdi\n\tadcq\t$0,%rsi\n\timulq\t%rdx\n\taddq\t%rsi,%rax\n\tadcq\t$0,%rdx\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%rsi\n\tadcq\t%rsi,%r15\n\tadcq\t%rsi,%rbx\n\tadcq\t%rsi,%rbp\n\tadcq\t%rsi,%rcx\n\tadcq\t%rsi,%rax\n\tadcq\t%rsi,%rdx\n\n\tmovq\t16(%rsp),%rsi\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__smulx_384x63;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulx_384x63:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%r12\n\tmovq\t0+40(%rsi),%r13\n\tmovq\t0+48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rdx\n\taddq\t%rax,%rdx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\txorq\t%rbp,%r12\n\txorq\t%rbp,%r13\n\txorq\t%rbp,%r14\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tmovq\t%rcx,%rdx\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,%r15\n\tmovq\t%r14,%rbx\n\tmovq\t56+0(%rsi),%r8\n\tmovq\t56+8(%rsi),%r9\n\tmovq\t56+16(%rsi),%r10\n\tmovq\t56+24(%rsi),%r11\n\tmovq\t56+32(%rsi),%r12\n\tmovq\t56+40(%rsi),%r13\n\tmovq\t56+48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rdx\n\taddq\t%rax,%rdx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\txorq\t%rbp,%r12\n\txorq\t%rbp,%r13\n\txorq\t%rbp,%r14\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t%r15,%r13\n\tadcq\t%rbx,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__smulx_384_n_shift_by_31;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulx_384_n_shift_by_31:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%r12\n\tmovq\t0+40(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tandq\t%rdx,%rax\n\tnegq\t%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r14\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%r14,%r10\n\tmulxq\t%r11,%r11,%r14\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%r14,%r12\n\tmulxq\t%r13,%r13,%r14\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,%r15\n\tmovq\t48+0(%rsi),%r8\n\tmovq\t48+8(%rsi),%r9\n\tmovq\t48+16(%rsi),%r10\n\tmovq\t48+24(%rsi),%r11\n\tmovq\t48+32(%rsi),%r12\n\tmovq\t48+40(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tandq\t%rdx,%rax\n\tnegq\t%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r14\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%r14,%r10\n\tmulxq\t%r11,%r11,%r14\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%r14,%r12\n\tmulxq\t%r13,%r13,%r14\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\tmovq\t%rbx,%rdx\n\n\tshrdq\t$31,%r9,%r8\n\tshrdq\t$31,%r10,%r9\n\tshrdq\t$31,%r11,%r10\n\tshrdq\t$31,%r12,%r11\n\tshrdq\t$31,%r13,%r12\n\tshrdq\t$31,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__smulx_191_n_shift_by_31;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__smulx_191_n_shift_by_31:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%r10,%rax\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r10\n\taddq\t%rbp,%r9\n\tadcq\t$0,%r10\n\timulq\t%rdx\n\taddq\t%rax,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmovq\t%rcx,%rdx\n\tmovq\t48+0(%rsi),%r11\n\tmovq\t48+8(%rsi),%r12\n\tmovq\t48+16(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%r13,%rax\n\taddq\t%rbp,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%rax\n\n\tmulxq\t%r11,%r11,%rbp\n\tmulxq\t%r12,%r12,%r13\n\taddq\t%rbp,%r12\n\tadcq\t$0,%r13\n\timulq\t%rdx\n\taddq\t%rax,%r13\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t%r9,%r12\n\tadcq\t%r10,%r13\n\tadcq\t%rdx,%r14\n\tmovq\t%rbx,%rdx\n\n\tshrdq\t$31,%r12,%r11\n\tshrdq\t$31,%r13,%r12\n\tshrdq\t$31,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r11,0(%rdi)\n\tmovq\t%r12,8(%rdi)\n\tmovq\t%r13,16(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__ab_approximation_31;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__ab_approximation_31:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t40(%rsi),%r9\n\tmovq\t88(%rsi),%r11\n\tmovq\t32(%rsi),%rbx\n\tmovq\t80(%rsi),%rbp\n\tmovq\t24(%rsi),%r8\n\tmovq\t72(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t16(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t64(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t8(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t56(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t0(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t48(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r9\n\tcmovzq\t%r10,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tmovl\t$0x7FFFFFFF,%eax\n\tandq\t%rax,%r8\n\tandq\t%rax,%r10\n\tandnq\t%r9,%rax,%r9\n\tandnq\t%r11,%rax,%r11\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\n\tjmp\t__inner_loop_31\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.def\t__inner_loop_31;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__inner_loop_31:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rcx\n\tmovq\t$0x800000007FFFFFFF,%r13\n\tmovq\t$0x7FFFFFFF7FFFFFFF,%r15\n\n.Loop_31:\n\tcmpq\t%r10,%r8\n\tmovq\t%r8,%rax\n\tmovq\t%r10,%rbx\n\tmovq\t%rcx,%rbp\n\tmovq\t%r13,%r14\n\tcmovbq\t%r10,%r8\n\tcmovbq\t%rax,%r10\n\tcmovbq\t%r13,%rcx\n\tcmovbq\t%rbp,%r13\n\n\tsubq\t%r10,%r8\n\tsubq\t%r13,%rcx\n\taddq\t%r15,%rcx\n\n\ttestq\t$1,%rax\n\tcmovzq\t%rax,%r8\n\tcmovzq\t%rbx,%r10\n\tcmovzq\t%rbp,%rcx\n\tcmovzq\t%r14,%r13\n\n\tshrq\t$1,%r8\n\taddq\t%r13,%r13\n\tsubq\t%r15,%r13\n\tsubl\t$1,%edi\n\tjnz\t.Loop_31\n\n\tshrq\t$32,%r15\n\tmovl\t%ecx,%edx\n\tmovl\t%r13d,%r12d\n\tshrq\t$32,%rcx\n\tshrq\t$32,%r13\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\tsubq\t%r15,%r12\n\tsubq\t%r15,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__tail_loop_55;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__tail_loop_55:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t$1,%r13\n\n.Loop_55:\n\txorq\t%rax,%rax\n\ttestq\t$1,%r8\n\tmovq\t%r10,%rbx\n\tcmovnzq\t%r10,%rax\n\tsubq\t%r8,%rbx\n\tmovq\t%r8,%rbp\n\tsubq\t%rax,%r8\n\tcmovcq\t%rbx,%r8\n\tcmovcq\t%rbp,%r10\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrq\t$1,%r8\n\ttestq\t$1,%rbp\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%edi\n\tjnz\t.Loop_55\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_ctx_inverse_mod_384\n.rva\t.LSEH_body_ctx_inverse_mod_384\n.rva\t.LSEH_info_ctx_inverse_mod_384_prologue\n\n.rva\t.LSEH_body_ctx_inverse_mod_384\n.rva\t.LSEH_epilogue_ctx_inverse_mod_384\n.rva\t.LSEH_info_ctx_inverse_mod_384_body\n\n.rva\t.LSEH_epilogue_ctx_inverse_mod_384\n.rva\t.LSEH_end_ctx_inverse_mod_384\n.rva\t.LSEH_info_ctx_inverse_mod_384_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_ctx_inverse_mod_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_ctx_inverse_mod_384_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x8b,0x00\n.byte\t0x00,0xe4,0x8c,0x00\n.byte\t0x00,0xd4,0x8d,0x00\n.byte\t0x00,0xc4,0x8e,0x00\n.byte\t0x00,0x34,0x8f,0x00\n.byte\t0x00,0x54,0x90,0x00\n.byte\t0x00,0x74,0x92,0x00\n.byte\t0x00,0x64,0x93,0x00\n.byte\t0x00,0x01,0x91,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_ctx_inverse_mod_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/div3w-armv8.S",
    "content": ".text\n\n.globl\tdiv_3_limbs\n\n.def\tdiv_3_limbs;\n.type\t32;\n.endef\n.p2align\t5\ndiv_3_limbs:\n\thint\t#34\n\tldp\tx4,x5,[x0]\t// load R\n\teor\tx0,x0,x0\t// Q = 0\n\tmov\tx3,#64\t\t// loop counter\n\tnop\n\n.Loop:\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsbcs\tx7,x5,x2\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tcsel\tx4,x4,x6,lo\t// select between R and R - D\n\textr\tx1,x2,x1,#1\t// D >>= 1\n\tcsel\tx5,x5,x7,lo\n\tlsr\tx2,x2,#1\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\tsub\tx3,x3,#1\n\tcbnz\tx3,.Loop\n\n\tasr\tx3,x0,#63\t// top bit -> mask\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tsbcs\tx7,x5,x2\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\n\torr\tx0,x0,x3\t// all ones if overflow\n\n\tret\n\n.globl\tquot_rem_128\n\n.def\tquot_rem_128;\n.type\t32;\n.endef\n.p2align\t5\nquot_rem_128:\n\thint\t#34\n\tldp\tx3,x4,[x1]\n\n\tmul\tx5,x3,x2\t// divisor[0:1} * quotient\n\tumulh\tx6,x3,x2\n\tmul\tx11,  x4,x2\n\tumulh\tx7,x4,x2\n\n\tldp\tx8,x9,[x0]\t// load 3 limbs of the dividend\n\tldr\tx10,[x0,#16]\n\n\tadds\tx6,x6,x11\n\tadc\tx7,x7,xzr\n\n\tsubs\tx8,x8,x5\t// dividend - divisor * quotient\n\tsbcs\tx9,x9,x6\n\tsbcs\tx10,x10,x7\n\tsbc\tx5,xzr,xzr\t\t// borrow -> mask\n\n\tadd\tx2,x2,x5\t// if borrowed, adjust the quotient ...\n\tand\tx3,x3,x5\n\tand\tx4,x4,x5\n\tadds\tx8,x8,x3\t// ... and add divisor\n\tadc\tx9,x9,x4\n\n\tstp\tx8,x9,[x0]\t// save 2 limbs of the remainder\n\tstr\tx2,[x0,#16]\t// and one limb of the quotient\n\n\tmov\tx0,x2\t\t// return adjusted quotient\n\n\tret\n\n\n.globl\tquot_rem_64\n\n.def\tquot_rem_64;\n.type\t32;\n.endef\n.p2align\t5\nquot_rem_64:\n\thint\t#34\n\tldr\tx3,[x1]\n\tldr\tx8,[x0]\t// load 1 limb of the dividend\n\n\tmul\tx5,x3,x2\t// divisor * quotient\n\n\tsub\tx8,x8,x5\t// dividend - divisor * quotient\n\n\tstp\tx8,x2,[x0]\t// save remainder and quotient\n\n\tmov\tx0,x2\t\t// return quotient\n\n\tret\n\n"
  },
  {
    "path": "build/coff/div3w-x86_64.s",
    "content": ".text\t\n\n.globl\tdiv_3_limbs\n\n.def\tdiv_3_limbs;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\ndiv_3_limbs:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_div_3_limbs:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n.LSEH_body_div_3_limbs:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\txorq\t%rax,%rax\n\tmovl\t$64,%ecx\n\n.Loop:\n\tmovq\t%r8,%r10\n\tsubq\t%rsi,%r8\n\tmovq\t%r9,%r11\n\tsbbq\t%rdx,%r9\n\tleaq\t1(%rax,%rax,1),%rax\n\tmovq\t%rdx,%rdi\n\tcmovcq\t%r10,%r8\n\tcmovcq\t%r11,%r9\n\tsbbq\t$0,%rax\n\tshlq\t$63,%rdi\n\tshrq\t$1,%rsi\n\tshrq\t$1,%rdx\n\torq\t%rdi,%rsi\n\tsubl\t$1,%ecx\n\tjnz\t.Loop\n\n\tleaq\t1(%rax,%rax,1),%rcx\n\tsarq\t$63,%rax\n\n\tsubq\t%rsi,%r8\n\tsbbq\t%rdx,%r9\n\tsbbq\t$0,%rcx\n\n\torq\t%rcx,%rax\n\n.LSEH_epilogue_div_3_limbs:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_div_3_limbs:\n.globl\tquot_rem_128\n\n.def\tquot_rem_128;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nquot_rem_128:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_quot_rem_128:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n.LSEH_body_quot_rem_128:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t%rdx,%rax\n\tmovq\t%rdx,%rcx\n\n\tmulq\t0(%rsi)\n\tmovq\t%rax,%r8\n\tmovq\t%rcx,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r9\n\tadcq\t$0,%rdx\n\n\tmovq\t0(%rdi),%r10\n\tmovq\t8(%rdi),%r11\n\tmovq\t16(%rdi),%rax\n\n\tsubq\t%r8,%r10\n\tsbbq\t%r9,%r11\n\tsbbq\t%rdx,%rax\n\tsbbq\t%r8,%r8\n\n\taddq\t%r8,%rcx\n\tmovq\t%r8,%r9\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\taddq\t%r8,%r10\n\tadcq\t%r9,%r11\n\n\tmovq\t%r10,0(%rdi)\n\tmovq\t%r11,8(%rdi)\n\tmovq\t%rcx,16(%rdi)\n\n\tmovq\t%rcx,%rax\n\n.LSEH_epilogue_quot_rem_128:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_quot_rem_128:\n\n\n\n\n\n.globl\tquot_rem_64\n\n.def\tquot_rem_64;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nquot_rem_64:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_quot_rem_64:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n.LSEH_body_quot_rem_64:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t%rdx,%rax\n\timulq\t0(%rsi),%rdx\n\n\tmovq\t0(%rdi),%r10\n\n\tsubq\t%rdx,%r10\n\n\tmovq\t%r10,0(%rdi)\n\tmovq\t%rax,8(%rdi)\n\n.LSEH_epilogue_quot_rem_64:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_quot_rem_64:\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_div_3_limbs\n.rva\t.LSEH_body_div_3_limbs\n.rva\t.LSEH_info_div_3_limbs_prologue\n\n.rva\t.LSEH_body_div_3_limbs\n.rva\t.LSEH_epilogue_div_3_limbs\n.rva\t.LSEH_info_div_3_limbs_body\n\n.rva\t.LSEH_epilogue_div_3_limbs\n.rva\t.LSEH_end_div_3_limbs\n.rva\t.LSEH_info_div_3_limbs_epilogue\n\n.rva\t.LSEH_begin_quot_rem_128\n.rva\t.LSEH_body_quot_rem_128\n.rva\t.LSEH_info_quot_rem_128_prologue\n\n.rva\t.LSEH_body_quot_rem_128\n.rva\t.LSEH_epilogue_quot_rem_128\n.rva\t.LSEH_info_quot_rem_128_body\n\n.rva\t.LSEH_epilogue_quot_rem_128\n.rva\t.LSEH_end_quot_rem_128\n.rva\t.LSEH_info_quot_rem_128_epilogue\n\n.rva\t.LSEH_begin_quot_rem_64\n.rva\t.LSEH_body_quot_rem_64\n.rva\t.LSEH_info_quot_rem_64_prologue\n\n.rva\t.LSEH_body_quot_rem_64\n.rva\t.LSEH_epilogue_quot_rem_64\n.rva\t.LSEH_info_quot_rem_64_body\n\n.rva\t.LSEH_epilogue_quot_rem_64\n.rva\t.LSEH_end_quot_rem_64\n.rva\t.LSEH_info_quot_rem_64_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_div_3_limbs_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_div_3_limbs_body:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_div_3_limbs_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_quot_rem_128_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_quot_rem_128_body:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_quot_rem_128_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_quot_rem_64_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_quot_rem_64_body:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_quot_rem_64_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/mul_mont_256-armv8.S",
    "content": ".text\n\n.globl\tmul_mont_sparse_256\n\n.def\tmul_mont_sparse_256;\n.type\t32;\n.endef\n.p2align\t5\nmul_mont_sparse_256:\n\thint\t#34\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldr\tx9,        [x2]\n\tldp\tx12,x13,[x1,#16]\n\n\tmul\tx19,x10,x9\n\tldp\tx5,x6,[x3]\n\tmul\tx20,x11,x9\n\tldp\tx7,x8,[x3,#16]\n\tmul\tx21,x12,x9\n\tmul\tx22,x13,x9\n\n\tumulh\tx14,x10,x9\n\tumulh\tx15,x11,x9\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tumulh\tx17,x13,x9\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,xzr,    x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*1]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*2]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*3]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tadcs\tx20,x21,x15\n\tadcs\tx21,x22,x16\n\tadcs\tx22,x23,x17\n\tadc\tx23,xzr,xzr\n\n\tsubs\tx14,x19,x5\n\tsbcs\tx15,x20,x6\n\tsbcs\tx16,x21,x7\n\tsbcs\tx17,x22,x8\n\tsbcs\txzr,    x23,xzr\n\n\tcsel\tx19,x19,x14,lo\n\tcsel\tx20,x20,x15,lo\n\tcsel\tx21,x21,x16,lo\n\tcsel\tx22,x22,x17,lo\n\n\tstp\tx19,x20,[x0]\n\tstp\tx21,x22,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\tret\n\n.globl\tsqr_mont_sparse_256\n\n.def\tsqr_mont_sparse_256;\n.type\t32;\n.endef\n.p2align\t5\nsqr_mont_sparse_256:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tmov\tx4,x3\n\n\t////////////////////////////////////////////////////////////////\n\t//  |  |  |  |  |  |a1*a0|  |\n\t//  |  |  |  |  |a2*a0|  |  |\n\t//  |  |a3*a2|a3*a0|  |  |  |\n\t//  |  |  |  |a2*a1|  |  |  |\n\t//  |  |  |a3*a1|  |  |  |  |\n\t// *|  |  |  |  |  |  |  | 2|\n\t// +|a3*a3|a2*a2|a1*a1|a0*a0|\n\t//  |--+--+--+--+--+--+--+--|\n\t//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10\n\t//\n\t//  \"can't overflow\" below mark carrying into high part of\n\t//  multiplication result, which can't overflow, because it\n\t//  can never be all ones.\n\n\tmul\tx11,x6,x5\t// a[1]*a[0]\n\tumulh\tx15,x6,x5\n\tmul\tx12,x7,x5\t// a[2]*a[0]\n\tumulh\tx16,x7,x5\n\tmul\tx13,x8,x5\t// a[3]*a[0]\n\tumulh\tx19,x8,x5\n\n\tadds\tx12,x12,x15\t// accumulate high parts of multiplication\n\tmul\tx14,x7,x6\t// a[2]*a[1]\n\tumulh\tx15,x7,x6\n\tadcs\tx13,x13,x16\n\tmul\tx16,x8,x6\t// a[3]*a[1]\n\tumulh\tx17,x8,x6\n\tadc\tx19,x19,xzr\t// can't overflow\n\n\tmul\tx20,x8,x7\t// a[3]*a[2]\n\tumulh\tx21,x8,x7\n\n\tadds\tx15,x15,x16\t// accumulate high parts of multiplication\n\tmul\tx10,x5,x5\t// a[0]*a[0]\n\tadc\tx16,x17,xzr\t// can't overflow\n\n\tadds\tx13,x13,x14\t// accumulate low parts of multiplication\n\tumulh\tx5,x5,x5\n\tadcs\tx19,x19,x15\n\tmul\tx15,x6,x6\t// a[1]*a[1]\n\tadcs\tx20,x20,x16\n\tumulh\tx6,x6,x6\n\tadc\tx21,x21,xzr\t// can't overflow\n\n\tadds\tx11,x11,x11\t// acc[1-6]*=2\n\tmul\tx16,x7,x7\t// a[2]*a[2]\n\tadcs\tx12,x12,x12\n\tumulh\tx7,x7,x7\n\tadcs\tx13,x13,x13\n\tmul\tx17,x8,x8\t// a[3]*a[3]\n\tadcs\tx19,x19,x19\n\tumulh\tx8,x8,x8\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadc\tx22,xzr,xzr\n\n\tadds\tx11,x11,x5\t// +a[i]*a[i]\n\tadcs\tx12,x12,x15\n\tadcs\tx13,x13,x6\n\tadcs\tx19,x19,x16\n\tadcs\tx20,x20,x7\n\tadcs\tx21,x21,x17\n\tadc\tx22,x22,x8\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadds\tx10,x10,x19\t// accumulate upper half\n\tadcs\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadc\tx19,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x19,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n.globl\tfrom_mont_256\n\n.def\tfrom_mont_256;\n.type\t32;\n.endef\n.p2align\t5\nfrom_mont_256:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tredc_mont_256\n\n.def\tredc_mont_256;\n.type\t32;\n.endef\n.p2align\t5\nredc_mont_256:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx16,x17,[x1,#48]\n\n\tadds\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadcs\tx12,x12,x16\n\tadcs\tx13,x13,x17\n\tadc\tx9,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x9,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__mul_by_1_mont_256;\n.type\t32;\n.endef\n.p2align\t5\n__mul_by_1_mont_256:\n\tmul\tx3,x4,x10\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tadc\tx13,x9,x17\n\n\tret\n\n"
  },
  {
    "path": "build/coff/mul_mont_384-armv8.S",
    "content": ".text\n\n.globl\tadd_mod_384x384\n\n.def\tadd_mod_384x384;\n.type\t32;\n.endef\n.p2align\t5\nadd_mod_384x384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__add_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__add_mod_384x384;\n.type\t32;\n.endef\n.p2align\t5\n__add_mod_384x384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tadcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tadcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tadcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tstp\tx11,x12,[x0,#48]\n\tcsel\tx15,x15,x23,lo\n\tstp\tx13,x14,[x0,#64]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n\n\n.globl\tsub_mod_384x384\n\n.def\tsub_mod_384x384;\n.type\t32;\n.endef\n.p2align\t5\nsub_mod_384x384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__sub_mod_384x384;\n.type\t32;\n.endef\n.p2align\t5\n__sub_mod_384x384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tsbcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tsbcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tsbcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n\n\n.def\t__add_mod_384;\n.type\t32;\n.endef\n.p2align\t5\n__add_mod_384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tstp\tx11,x12,[x0]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\n\n.def\t__sub_mod_384;\n.type\t32;\n.endef\n.p2align\t5\n__sub_mod_384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\n\n.globl\tmul_mont_384x\n\n.def\tmul_mont_384x;\n.type\t32;\n.endef\n.p2align\t5\nmul_mont_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#288\t\t// space for 3 768-bit vectors\n\n\tmov\tx26,x0\t\t// save r_ptr\n\tmov\tx27,x1\t\t// save b_ptr\n\tmov\tx28,x2\t\t// save b_ptr\n\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tadd\tx1,x1,#48\n\tadd\tx2,x2,#48\n\tadd\tx0,sp,#96\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tsub\tx2,x1,#48\n\tadd\tx0,sp,#240\n\tbl\t__add_mod_384\n\n\tadd\tx1,x28,#0\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#192\n\tbl\t__add_mod_384\n\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\t\t// mul_384(t2, a->re+a->im, b->re+b->im)\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,x0\n\tadd\tx2,sp,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,sp,#96\n\tbl\t__sub_mod_384x384\t// t2 = t2-t0-t1\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#96\n\tadd\tx0,sp,#0\n\tbl\t__sub_mod_384x384\t// t0 = t0-t1\n\n\tadd\tx1,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\n\tadd\tx1,sp,#192\n\tadd\tx0,x0,#48\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#288\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsqr_mont_384x\n\n.def\tsqr_mont_384x;\n.type\t32;\n.endef\n.p2align\t5\nsqr_mont_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#96\t\t// space for 2 384-bit vectors\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tadd\tx0,sp,#0\n\tbl\t__add_mod_384\t\t// t0 = a->re + a->im\n\n\tadd\tx0,sp,#48\n\tbl\t__sub_mod_384\t\t// t1 = a->re - a->im\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx11,x11,x11\t// add with itself\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx19,x11,x19,lo\n\tcsel\tx20,x12,x20,lo\n\tcsel\tx21,x13,x21,lo\n\tldp\tx11,x12,[sp]\n\tcsel\tx22,x14,x22,lo\n\tldr\tx17,        [sp,#48]\n\tcsel\tx23,x15,x23,lo\n\tldp\tx13,x14,[sp,#16]\n\tcsel\tx24,x16,x24,lo\n\tldp\tx15,x16,[sp,#32]\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->re, t0, t1)\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tmul_mont_384\n\n.def\tmul_mont_384;\n.type\t32;\n.endef\n.p2align\t5\nmul_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__mul_mont_384;\n.type\t32;\n.endef\n.p2align\t5\n__mul_mont_384:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tmov\tx17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*1]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*2]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*3]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*4]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*5]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\t// pull r_ptr\n\tadc\tx17,x17,xzr\n\n\tadds\tx19,x20,x26\n\tadcs\tx20,x21,x27\n\tadcs\tx21,x22,x28\n\tadcs\tx22,x23,x0\n\tadcs\tx23,x24,x1\n\tadcs\tx24,x25,x3\n\tadc\tx25,x17,xzr\n\n\tsubs\tx26,x19,x5\n\tsbcs\tx27,x20,x6\n\tsbcs\tx28,x21,x7\n\tsbcs\tx0,x22,x8\n\tsbcs\tx1,x23,x9\n\tsbcs\tx3,x24,x10\n\tsbcs\txzr,    x25,xzr\n\n\tcsel\tx11,x19,x26,lo\n\tcsel\tx12,x20,x27,lo\n\tcsel\tx13,x21,x28,lo\n\tcsel\tx14,x22,x0,lo\n\tcsel\tx15,x23,x1,lo\n\tcsel\tx16,x24,x3,lo\n\tret\n\n\n.globl\tsqr_mont_384\n\n.def\tsqr_mont_384;\n.type\t32;\n.endef\n.p2align\t5\nsqr_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\t\t// space for 768-bit vector\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tmov\tx3,x0\t\t// save r_ptr\n\tmov\tx0,sp\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tmov\tx1,sp\n\tmov\tx0,x3\t\t// restore r_ptr\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsqr_n_mul_mont_383\n\n.def\tsqr_n_mul_mont_383;\n.type\t32;\n.endef\n.p2align\t5\nsqr_n_mul_mont_383:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#96\t\t// space for 768-bit vector\n\tmov\tx17,x5\t\t\t// save b_ptr\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\tmov\tx0,sp\n.Loop_sqr_383:\n\tbl\t__sqr_384\n\tsub\tx2,x2,#1\t// counter\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,sp\n\tbl\t__mul_by_1_mont_384\n\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\t// just accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tcbnz\tx2,.Loop_sqr_383\n\n\tmov\tx2,x17\n\tldr\tx17,[x17]\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n.def\t__sqr_384;\n.type\t32;\n.endef\n.p2align\t5\n__sqr_384:\n\tmul\tx19,x12,x11\n\tmul\tx20,x13,x11\n\tmul\tx21,x14,x11\n\tmul\tx22,x15,x11\n\tmul\tx23,x16,x11\n\n\tumulh\tx6,x12,x11\n\tumulh\tx7,x13,x11\n\tumulh\tx8,x14,x11\n\tumulh\tx9,x15,x11\n\tadds\tx20,x20,x6\n\tumulh\tx10,x16,x11\n\tadcs\tx21,x21,x7\n\tmul\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tmul\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tmul\tx9,x15,x12\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x12\n\n\tadds\tx21,x21,x7\n\tumulh\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x12\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x12\n\tadc\tx25,xzr,xzr\n\n\tmul\tx5,x11,x11\n\tadds\tx22,x22,x7\n\tumulh\tx11,  x11,x11\n\tadcs\tx23,x23,x8\n\tmul\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tmul\tx9,x15,x13\n\tadc\tx25,x25,x10\n\tmul\tx10,x16,x13\n\n\tadds\tx23,x23,x8\n\tumulh\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tumulh\tx9,x15,x13\n\tadcs\tx25,x25,x10\n\tumulh\tx10,x16,x13\n\tadc\tx26,xzr,xzr\n\n\tmul\tx6,x12,x12\n\tadds\tx24,x24,x8\n\tumulh\tx12,  x12,x12\n\tadcs\tx25,x25,x9\n\tmul\tx9,x15,x14\n\tadc\tx26,x26,x10\n\tmul\tx10,x16,x14\n\n\tadds\tx25,x25,x9\n\tumulh\tx9,x15,x14\n\tadcs\tx26,x26,x10\n\tumulh\tx10,x16,x14\n\tadc\tx27,xzr,xzr\n\tmul\tx7,x13,x13\n\tadds\tx26,x26,x9\n\tumulh\tx13,  x13,x13\n\tadc\tx27,x27,x10\n\tmul\tx8,x14,x14\n\n\tmul\tx10,x16,x15\n\tumulh\tx14,  x14,x14\n\tadds\tx27,x27,x10\n\tumulh\tx10,x16,x15\n\tmul\tx9,x15,x15\n\tadc\tx28,x10,xzr\n\n\tadds\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadcs\tx22,x22,x22\n\tadcs\tx23,x23,x23\n\tadcs\tx24,x24,x24\n\tadcs\tx25,x25,x25\n\tadcs\tx26,x26,x26\n\tumulh\tx15,  x15,x15\n\tadcs\tx27,x27,x27\n\tmul\tx10,x16,x16\n\tadcs\tx28,x28,x28\n\tumulh\tx16,  x16,x16\n\tadc\tx1,xzr,xzr\n\n\tadds\tx19,x19,x11\n\tadcs\tx20,x20,x6\n\tadcs\tx21,x21,x12\n\tadcs\tx22,x22,x7\n\tadcs\tx23,x23,x13\n\tadcs\tx24,x24,x8\n\tadcs\tx25,x25,x14\n\tstp\tx5,x19,[x0]\n\tadcs\tx26,x26,x9\n\tstp\tx20,x21,[x0,#16]\n\tadcs\tx27,x27,x15\n\tstp\tx22,x23,[x0,#32]\n\tadcs\tx28,x28,x10\n\tstp\tx24,x25,[x0,#48]\n\tadc\tx16,x16,x1\n\tstp\tx26,x27,[x0,#64]\n\tstp\tx28,x16,[x0,#80]\n\n\tret\n\n.globl\tsqr_384\n\n.def\tsqr_384;\n.type\t32;\n.endef\n.p2align\t5\nsqr_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tredc_mont_384\n\n.def\tredc_mont_384;\n.type\t32;\n.endef\n.p2align\t5\nredc_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tfrom_mont_384\n\n.def\tfrom_mont_384;\n.type\t32;\n.endef\n.p2align\t5\nfrom_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__mul_by_1_mont_384;\n.type\t32;\n.endef\n.p2align\t5\n__mul_by_1_mont_384:\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tmul\tx26,x4,x11\n\tldp\tx15,x16,[x1,#32]\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\tret\n\n\n.def\t__redc_tail_mont_384;\n.type\t32;\n.endef\n.p2align\t5\n__redc_tail_mont_384:\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\t// accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\n\n.globl\tmul_384\n\n.def\tmul_384;\n.type\t32;\n.endef\n.p2align\t5\nmul_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__mul_384;\n.type\t32;\n.endef\n.p2align\t5\n__mul_384:\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\n\tumulh\tx5,x11,x17\n\tumulh\tx6,x12,x17\n\tumulh\tx7,x13,x17\n\tumulh\tx8,x14,x17\n\tumulh\tx9,x15,x17\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,8*1]\n\n\tstr\tx19,[x0]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(1+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*1]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(2+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*2]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(3+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*3]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(4+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*4]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*5]\n\tadds\tx19,x20,x5\n\tadcs\tx20,x21,x6\n\tadcs\tx21,x22,x7\n\tadcs\tx22,x23,x8\n\tadcs\tx23,x24,x9\n\tadc\tx24,x25,x10\n\n\tstp\tx19,x20,[x0,#48]\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tret\n\n\n.globl\tmul_382x\n\n.def\tmul_382x;\n.type\t32;\n.endef\n.p2align\t5\nmul_382x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\t\t// space for two 384-bit vectors\n\n\tldp\tx11,x12,[x1]\n\tmov\tx26,x0\t\t// save r_ptr\n\tldp\tx19,x20,[x1,#48]\n\tmov\tx27,x1\t\t// save a_ptr\n\tldp\tx13,x14,[x1,#16]\n\tmov\tx28,x2\t\t// save b_ptr\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx15,x16,[x1,#32]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx6,x12,x20\n\tldp\tx11,x12,[x2]\n\tadcs\tx7,x13,x21\n\tldp\tx19,x20,[x2,#48]\n\tadcs\tx8,x14,x22\n\tldp\tx13,x14,[x2,#16]\n\tadcs\tx9,x15,x23\n\tldp\tx21,x22,[x2,#64]\n\tadc\tx10,x16,x24\n\tldp\tx15,x16,[x2,#32]\n\n\tstp\tx5,x6,[sp]\n\tadds\tx5,x11,x19\t// t1 = b->re + b->im\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx6,x12,x20\n\tstp\tx7,x8,[sp,#16]\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tstp\tx9,x10,[sp,#32]\n\tadcs\tx9,x15,x23\n\tstp\tx5,x6,[sp,#48]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[sp,#64]\n\tstp\tx9,x10,[sp,#80]\n\n\tbl\t__mul_384\t\t// mul_384(ret->re, a->re, b->re)\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#48\n\tadd\tx0,x26,#96\n\tbl\t__mul_384\n\n\tadd\tx1,x27,#48\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tadd\tx1,x26,#96\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#96\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,x26,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx1,x26,#0\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsqr_382x\n\n.def\tsqr_382x;\n.type\t32;\n.endef\n.p2align\t5\nsqr_382x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx13,x14,[x1,#16]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx21,x22,[x1,#64]\n\tadcs\tx6,x12,x20\n\tldp\tx15,x16,[x1,#32]\n\tadcs\tx7,x13,x21\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx8,x14,x22\n\tstp\tx5,x6,[x0]\n\tadcs\tx9,x15,x23\n\tldp\tx5,x6,[x2]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[x0,#16]\n\n\tsubs\tx11,x11,x19\t// t1 = a->re - a->im\n\tldp\tx7,x8,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tstp\tx9,x10,[x0,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx9,x10,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x25\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x25\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x25\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x25\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tmov\tx4,x1\t\t// save a_ptr\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\n\n\tadd\tx1,x4,#0\n\tadd\tx2,x4,#48\n\tadd\tx0,x0,#96\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x0]\n\tldp\tx13,x14,[x0,#16]\n\tadds\tx11,x11,x11\t// add with itself\n\tldp\tx15,x16,[x0,#32]\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadcs\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tstp\tx11,x12,[x0]\n\tadcs\tx21,x21,x21\n\tstp\tx13,x14,[x0,#16]\n\tadcs\tx22,x22,x22\n\tstp\tx15,x16,[x0,#32]\n\tadcs\tx23,x23,x23\n\tstp\tx19,x20,[x0,#48]\n\tadc\tx24,x24,x24\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsqr_mont_382x\n\n.def\tsqr_mont_382x;\n.type\t32;\n.endef\n.p2align\t5\nsqr_mont_382x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#112\t\t// space for two 384-bit vectors + word\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx17,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx5,x11,x17\t// t0 = a->re + a->im\n\tadcs\tx6,x12,x20\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tadcs\tx9,x15,x23\n\tadc\tx10,x16,x24\n\n\tsubs\tx19,x11,x17\t// t1 = a->re - a->im\n\tsbcs\tx20,x12,x20\n\tsbcs\tx21,x13,x21\n\tsbcs\tx22,x14,x22\n\tsbcs\tx23,x15,x23\n\tsbcs\tx24,x16,x24\n\tsbc\tx25,xzr,xzr\t\t// borrow flag as mask\n\n\tstp\tx5,x6,[sp]\n\tstp\tx7,x8,[sp,#16]\n\tstp\tx9,x10,[sp,#32]\n\tstp\tx19,x20,[sp,#48]\n\tstp\tx21,x22,[sp,#64]\n\tstp\tx23,x24,[sp,#80]\n\tstr\tx25,[sp,#96]\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx19,x11,x11\t// add with itself\n\tadcs\tx20,x12,x12\n\tadcs\tx21,x13,x13\n\tadcs\tx22,x14,x14\n\tadcs\tx23,x15,x15\n\tadc\tx24,x16,x16\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tldp\tx11,x12,[sp]\n\tldr\tx17,[sp,#48]\n\tldp\tx13,x14,[sp,#16]\n\tldp\tx15,x16,[sp,#32]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, t0, t1)\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldr\tx25,[sp,#96]\t// account for sign from a->re - a->im\n\tldp\tx19,x20,[sp]\n\tldp\tx21,x22,[sp,#16]\n\tldp\tx23,x24,[sp,#32]\n\n\tand\tx19,x19,x25\n\tand\tx20,x20,x25\n\tand\tx21,x21,x25\n\tand\tx22,x22,x25\n\tand\tx23,x23,x25\n\tand\tx24,x24,x25\n\n\tsubs\tx11,x11,x19\n\tsbcs\tx12,x12,x20\n\tsbcs\tx13,x13,x21\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tand\tx21,x7,x25\n\tand\tx22,x8,x25\n\tand\tx23,x9,x25\n\tand\tx24,x10,x25\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#112\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.def\t__mul_mont_383_nonred;\n.type\t32;\n.endef\n.p2align\t5\n__mul_mont_383_nonred:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*1]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*2]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*3]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*4]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*5]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\t\t// pull r_ptr\n\n\tadds\tx11,x20,x26\n\tadcs\tx12,x21,x27\n\tadcs\tx13,x22,x28\n\tadcs\tx14,x23,x0\n\tadcs\tx15,x24,x1\n\tadcs\tx16,x25,x3\n\n\tret\n\n\n.globl\tsgn0_pty_mont_384\n\n.def\tsgn0_pty_mont_384;\n.type\t32;\n.endef\n.p2align\t5\nsgn0_pty_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\tadds\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\tsgn0_pty_mont_384x\n\n.def\tsgn0_pty_mont_384x;\n.type\t32;\n.endef\n.p2align\t5\nsgn0_pty_mont_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tadd\tx1,x1,#48\n\n\tand\tx2,x11,#1\n\torr\tx3,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\torr\tx3,x3,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx2,x2,x17\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\torr\tx1,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\torr\tx1,x1,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t\t// pack sign and parity\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n"
  },
  {
    "path": "build/coff/mulq_mont_256-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n.globl\tmul_mont_sparse_256\n\n.def\tmul_mont_sparse_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_mont_sparse_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_mont_sparse_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_mont_sparse_256$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdi\n\n.LSEH_body_mul_mont_sparse_256:\n\n\n\tmovq\t0(%rdx),%rax\n\tmovq\t0(%rsi),%r13\n\tmovq\t8(%rsi),%r14\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%rbp\n\tmovq\t%rdx,%rbx\n\n\tmovq\t%rax,%r15\n\tmulq\t%r13\n\tmovq\t%rax,%r9\n\tmovq\t%r15,%rax\n\tmovq\t%rdx,%r10\n\tcall\t__mulq_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_mul_mont_sparse_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_mont_sparse_256:\n\n.globl\tsqr_mont_sparse_256\n\n.def\tsqr_mont_sparse_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_mont_sparse_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_mont_sparse_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_sparse_256$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdi\n\n.LSEH_body_sqr_mont_sparse_256:\n\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t%rcx,%r8\n\tmovq\t8(%rsi),%r14\n\tmovq\t%rdx,%rcx\n\tmovq\t16(%rsi),%r12\n\tleaq\t(%rsi),%rbx\n\tmovq\t24(%rsi),%rbp\n\n\tmovq\t%rax,%r15\n\tmulq\t%rax\n\tmovq\t%rax,%r9\n\tmovq\t%r15,%rax\n\tmovq\t%rdx,%r10\n\tcall\t__mulq_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sqr_mont_sparse_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_mont_sparse_256:\n.def\t__mulq_mont_sparse_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulq_mont_sparse_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmulq\t%r14\n\taddq\t%rax,%r10\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t%r12\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\txorq\t%r14,%r14\n\tmovq\t%rdx,%r13\n\n\tmovq\t%r9,%rdi\n\timulq\t%r8,%r9\n\n\n\tmovq\t%rax,%r15\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t%rdx,%r14\n\txorq\t%r15,%r15\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tmovq\t%r10,%rdi\n\timulq\t%r8,%r10\n\n\n\tmovq\t%rax,%r9\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t%rdx,%r15\n\txorq\t%r9,%r9\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%r9\n\tmovq\t%r11,%rdi\n\timulq\t%r8,%r11\n\n\n\tmovq\t%rax,%r10\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t%rdx,%r9\n\txorq\t%r10,%r10\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r15\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\timulq\t%r8,%rax\n\tmovq\t8(%rsp),%rsi\n\n\n\tmovq\t%rax,%r11\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r12\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\tmovq\t%r14,%rbx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\n\n\n\tmovq\t%r15,%r12\n\tsubq\t0(%rcx),%r13\n\tsbbq\t8(%rcx),%r14\n\tsbbq\t16(%rcx),%r15\n\tmovq\t%r9,%rbp\n\tsbbq\t24(%rcx),%r9\n\tsbbq\t$0,%r10\n\n\tcmovcq\t%rax,%r13\n\tcmovcq\t%rbx,%r14\n\tcmovcq\t%r12,%r15\n\tmovq\t%r13,0(%rsi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r14,8(%rsi)\n\tmovq\t%r15,16(%rsi)\n\tmovq\t%r9,24(%rsi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tfrom_mont_256\n\n.def\tfrom_mont_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nfrom_mont_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_from_mont_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tfrom_mont_256$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_from_mont_256:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_256\n\n\n\n\n\n\tmovq\t%r14,%r10\n\tmovq\t%r15,%r11\n\tmovq\t%r9,%r12\n\n\tsubq\t0(%rbx),%r13\n\tsbbq\t8(%rbx),%r14\n\tsbbq\t16(%rbx),%r15\n\tsbbq\t24(%rbx),%r9\n\n\tcmovncq\t%r13,%rax\n\tcmovncq\t%r14,%r10\n\tcmovncq\t%r15,%r11\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r9,%r12\n\tmovq\t%r10,8(%rdi)\n\tmovq\t%r11,16(%rdi)\n\tmovq\t%r12,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_from_mont_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_from_mont_256:\n\n.globl\tredc_mont_256\n\n.def\tredc_mont_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nredc_mont_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_redc_mont_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tredc_mont_256$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_redc_mont_256:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_256\n\n\taddq\t32(%rsi),%r13\n\tadcq\t40(%rsi),%r14\n\tmovq\t%r13,%rax\n\tadcq\t48(%rsi),%r15\n\tmovq\t%r14,%r10\n\tadcq\t56(%rsi),%r9\n\tsbbq\t%rsi,%rsi\n\n\n\n\n\tmovq\t%r15,%r11\n\tsubq\t0(%rbx),%r13\n\tsbbq\t8(%rbx),%r14\n\tsbbq\t16(%rbx),%r15\n\tmovq\t%r9,%r12\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t$0,%rsi\n\n\tcmovncq\t%r13,%rax\n\tcmovncq\t%r14,%r10\n\tcmovncq\t%r15,%r11\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r9,%r12\n\tmovq\t%r10,8(%rdi)\n\tmovq\t%r11,16(%rdi)\n\tmovq\t%r12,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_redc_mont_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_redc_mont_256:\n.def\t__mulq_by_1_mont_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulq_by_1_mont_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r10\n\tmovq\t16(%rsi),%r11\n\tmovq\t24(%rsi),%r12\n\n\tmovq\t%rax,%r13\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r9\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r13\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t16(%rbx)\n\tmovq\t%r10,%r14\n\timulq\t%rcx,%r10\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\tmovq\t%r11,%r15\n\timulq\t%rcx,%r11\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\tmovq\t%r12,%r9\n\timulq\t%rcx,%r12\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_mul_mont_sparse_256\n.rva\t.LSEH_body_mul_mont_sparse_256\n.rva\t.LSEH_info_mul_mont_sparse_256_prologue\n\n.rva\t.LSEH_body_mul_mont_sparse_256\n.rva\t.LSEH_epilogue_mul_mont_sparse_256\n.rva\t.LSEH_info_mul_mont_sparse_256_body\n\n.rva\t.LSEH_epilogue_mul_mont_sparse_256\n.rva\t.LSEH_end_mul_mont_sparse_256\n.rva\t.LSEH_info_mul_mont_sparse_256_epilogue\n\n.rva\t.LSEH_begin_sqr_mont_sparse_256\n.rva\t.LSEH_body_sqr_mont_sparse_256\n.rva\t.LSEH_info_sqr_mont_sparse_256_prologue\n\n.rva\t.LSEH_body_sqr_mont_sparse_256\n.rva\t.LSEH_epilogue_sqr_mont_sparse_256\n.rva\t.LSEH_info_sqr_mont_sparse_256_body\n\n.rva\t.LSEH_epilogue_sqr_mont_sparse_256\n.rva\t.LSEH_end_sqr_mont_sparse_256\n.rva\t.LSEH_info_sqr_mont_sparse_256_epilogue\n\n.rva\t.LSEH_begin_from_mont_256\n.rva\t.LSEH_body_from_mont_256\n.rva\t.LSEH_info_from_mont_256_prologue\n\n.rva\t.LSEH_body_from_mont_256\n.rva\t.LSEH_epilogue_from_mont_256\n.rva\t.LSEH_info_from_mont_256_body\n\n.rva\t.LSEH_epilogue_from_mont_256\n.rva\t.LSEH_end_from_mont_256\n.rva\t.LSEH_info_from_mont_256_epilogue\n\n.rva\t.LSEH_begin_redc_mont_256\n.rva\t.LSEH_body_redc_mont_256\n.rva\t.LSEH_info_redc_mont_256_prologue\n\n.rva\t.LSEH_body_redc_mont_256\n.rva\t.LSEH_epilogue_redc_mont_256\n.rva\t.LSEH_info_redc_mont_256_body\n\n.rva\t.LSEH_epilogue_redc_mont_256\n.rva\t.LSEH_end_redc_mont_256\n.rva\t.LSEH_info_redc_mont_256_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_mul_mont_sparse_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_mont_sparse_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_mont_sparse_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_mont_sparse_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_mont_sparse_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_mont_sparse_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_from_mont_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_from_mont_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_from_mont_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_redc_mont_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_redc_mont_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_redc_mont_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/mulq_mont_384-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n\n\n\n\n\n\n.def\t__subq_mod_384x384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__subq_mod_384x384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__addq_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__addq_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__subq_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__subq_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__subq_mod_384_a_is_loaded:\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tmul_mont_384x\n\n.def\tmul_mont_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_mont_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_mont_384x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_mont_384x$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$328,%rsp\n\n.LSEH_body_mul_mont_384x:\n\n\n\tmovq\t%rdx,%rbx\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\tmovq\t%r8,0(%rsp)\n\n\n\n\n\tleaq\t40(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tleaq\t48(%rbx),%rbx\n\tleaq\t48(%rsi),%rsi\n\tleaq\t40+96(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t8(%rsp),%rcx\n\tleaq\t-48(%rsi),%rdx\n\tleaq\t40+192+48(%rsp),%rdi\n\tcall\t__addq_mod_384\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__addq_mod_384\n\n\tleaq\t(%rdi),%rbx\n\tleaq\t48(%rdi),%rsi\n\tcall\t__mulq_384\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t40(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tcall\t__subq_mod_384x384\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t40(%rsp),%rsi\n\tleaq\t40+96(%rsp),%rdx\n\tleaq\t40(%rsp),%rdi\n\tcall\t__subq_mod_384x384\n\n\tmovq\t%rcx,%rbx\n\n\n\tleaq\t40(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\n\tleaq\t40+192(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tleaq\t328(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_mul_mont_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_mont_384x:\n.globl\tsqr_mont_384x\n\n.def\tsqr_mont_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_mont_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_mont_384x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_384x$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_sqr_mont_384x:\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__addq_mod_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32+48(%rsp),%rdi\n\tcall\t__subq_mod_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tcall\t__mulq_mont_384\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tmovq\t%r14,%r12\n\tadcq\t%r9,%r9\n\tmovq\t%r15,%r13\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%rax\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r10,%rbp\n\tsbbq\t16(%rcx),%r8\n\tsbbq\t24(%rcx),%r9\n\tsbbq\t32(%rcx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r12,%r14\n\tcmovcq\t%r13,%r15\n\tcmovcq\t%rax,%r8\n\tmovq\t%r14,48(%rdi)\n\tcmovcq\t%rbx,%r9\n\tmovq\t%r15,56(%rdi)\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r8,64(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r10,80(%rdi)\n\tmovq\t%r11,88(%rdi)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rax\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%r12\n\tmovq\t32+24(%rsp),%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqr_mont_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_mont_384x:\n\n.globl\tmul_382x\n\n.def\tmul_382x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_382x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_382x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_382x$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_mul_382x:\n\n\n\tleaq\t96(%rdi),%rdi\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t48(%rsi),%r8\n\tadcq\t56(%rsi),%r9\n\tadcq\t64(%rsi),%r10\n\tadcq\t72(%rsi),%r11\n\tadcq\t80(%rsi),%r12\n\tadcq\t88(%rsi),%r13\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t32(%rdx),%r12\n\tmovq\t40(%rdx),%r13\n\n\taddq\t48(%rdx),%r8\n\tadcq\t56(%rdx),%r9\n\tadcq\t64(%rdx),%r10\n\tadcq\t72(%rdx),%r11\n\tadcq\t80(%rdx),%r12\n\tadcq\t88(%rdx),%r13\n\n\tmovq\t%r8,32+48(%rsp)\n\tmovq\t%r9,32+56(%rsp)\n\tmovq\t%r10,32+64(%rsp)\n\tmovq\t%r11,32+72(%rsp)\n\tmovq\t%r12,32+80(%rsp)\n\tmovq\t%r13,32+88(%rsp)\n\n\n\tleaq\t32+0(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\tcall\t__mulq_384\n\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rbx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__mulq_384\n\n\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rbx),%rbx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t32(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tmovq\t%rsi,%rdi\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t0(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t-96(%rdi),%rsi\n\tleaq\t32(%rsp),%rdx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__subq_mod_384x384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_mul_382x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_382x:\n.globl\tsqr_382x\n\n.def\tsqr_382x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_382x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_382x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_382x$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rsi\n\n.LSEH_body_sqr_382x:\n\n\n\tmovq\t%rdx,%rcx\n\n\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%rbx\n\tmovq\t32(%rsi),%rbp\n\tmovq\t40(%rsi),%rdx\n\n\tmovq\t%r14,%r8\n\taddq\t48(%rsi),%r14\n\tmovq\t%r15,%r9\n\tadcq\t56(%rsi),%r15\n\tmovq\t%rax,%r10\n\tadcq\t64(%rsi),%rax\n\tmovq\t%rbx,%r11\n\tadcq\t72(%rsi),%rbx\n\tmovq\t%rbp,%r12\n\tadcq\t80(%rsi),%rbp\n\tmovq\t%rdx,%r13\n\tadcq\t88(%rsi),%rdx\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%rax,16(%rdi)\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rdx,40(%rdi)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__subq_mod_384_a_is_loaded\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-48(%rdi),%rbx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\tleaq\t96(%rdi),%rdi\n\tcall\t__mulq_384\n\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\tmovq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%r15\n\tmovq\t64(%rdi),%rax\n\tmovq\t72(%rdi),%rbx\n\tmovq\t80(%rdi),%rbp\n\taddq\t%r8,%r8\n\tmovq\t88(%rdi),%rdx\n\tadcq\t%r9,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%r10,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%r11,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%r12,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%r13,%r13\n\tmovq\t%r12,32(%rdi)\n\tadcq\t%r14,%r14\n\tmovq\t%r13,40(%rdi)\n\tadcq\t%r15,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%rax,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%rbx,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%rbp,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%rdx,%rdx\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sqr_382x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_382x:\n.globl\tmul_384\n\n.def\tmul_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n.LSEH_body_mul_384:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_384\n\n\tmovq\t0(%rsp),%r12\n\n\tmovq\t8(%rsp),%rbx\n\n\tmovq\t16(%rsp),%rbp\n\n\tleaq\t24(%rsp),%rsp\n\n.LSEH_epilogue_mul_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_384:\n\n.def\t__mulq_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulq_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rbx),%rax\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%rbp,%rax\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,8(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,16(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,24(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,32(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,40(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rax,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rcx,48(%rdi)\n\tmovq\t%r8,56(%rdi)\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r10,72(%rdi)\n\tmovq\t%r11,80(%rdi)\n\tmovq\t%r12,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tsqr_384\n\n.def\tsqr_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sqr_384:\n\n\n\tcall\t__sqrq_384\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sqr_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_384:\n\n.def\t__sqrq_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__sqrq_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rcx\n\tmovq\t24(%rsi),%rbx\n\n\n\tmovq\t%rax,%r14\n\tmulq\t%r15\n\tmovq\t%rax,%r9\n\tmovq\t%r14,%rax\n\tmovq\t32(%rsi),%rbp\n\tmovq\t%rdx,%r10\n\n\tmulq\t%rcx\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t40(%rsi),%rsi\n\tmovq\t%rdx,%r11\n\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t%rsi\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t%rax\n\txorq\t%r8,%r8\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r15,%rax\n\taddq\t%r9,%r9\n\tadcq\t$0,%r8\n\taddq\t%rdx,%r9\n\tadcq\t$0,%r8\n\tmovq\t%r9,8(%rdi)\n\n\tmulq\t%rcx\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rsi\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t%rax\n\txorq\t%r9,%r9\n\taddq\t%rax,%r8\n\tmovq\t%rcx,%rax\n\taddq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%r9\n\taddq\t%r8,%r10\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r9\n\tmovq\t%r10,16(%rdi)\n\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rbp\n\taddq\t%rax,%r14\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rsi\n\taddq\t%rax,%r15\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rcx\n\n\tmulq\t%rax\n\txorq\t%r11,%r11\n\taddq\t%rax,%r9\n\tmovq\t%rbx,%rax\n\taddq\t%r12,%r12\n\tadcq\t%r13,%r13\n\tadcq\t$0,%r11\n\taddq\t%r9,%r12\n\tadcq\t%rdx,%r13\n\tadcq\t$0,%r11\n\tmovq\t%r12,32(%rdi)\n\n\n\tmulq\t%rbp\n\taddq\t%rax,%r15\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rsi\n\taddq\t%rax,%rcx\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbx\n\n\tmulq\t%rax\n\txorq\t%r12,%r12\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t$0,%r12\n\taddq\t%r11,%r14\n\tadcq\t%rdx,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t$0,%r12\n\tmovq\t%r15,56(%rdi)\n\n\n\tmulq\t%rsi\n\taddq\t%rax,%rbx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t%rax\n\txorq\t%r13,%r13\n\taddq\t%rax,%r12\n\tmovq\t%rsi,%rax\n\taddq\t%rcx,%rcx\n\tadcq\t%rbx,%rbx\n\tadcq\t$0,%r13\n\taddq\t%r12,%rcx\n\tadcq\t%rdx,%rbx\n\tmovq\t%rcx,64(%rdi)\n\tadcq\t$0,%r13\n\tmovq\t%rbx,72(%rdi)\n\n\n\tmulq\t%rax\n\taddq\t%r13,%rax\n\taddq\t%rbp,%rbp\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rax,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tsqr_mont_384\n\n.def\tsqr_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$120,%rsp\n\n.LSEH_body_sqr_mont_384:\n\n\n\tmovq\t%rcx,96(%rsp)\n\tmovq\t%rdx,104(%rsp)\n\tmovq\t%rdi,112(%rsp)\n\n\tmovq\t%rsp,%rdi\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rsp),%rsi\n\tmovq\t96(%rsp),%rcx\n\tmovq\t104(%rsp),%rbx\n\tmovq\t112(%rsp),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tleaq\t120(%rsp),%r8\n\tmovq\t120(%rsp),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqr_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_mont_384:\n\n\n\n.globl\tredc_mont_384\n\n.def\tredc_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nredc_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_redc_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tredc_mont_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_redc_mont_384:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_redc_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_redc_mont_384:\n\n\n\n\n.globl\tfrom_mont_384\n\n.def\tfrom_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nfrom_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_from_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tfrom_mont_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_from_mont_384:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_384\n\n\n\n\n\n\tmovq\t%r15,%rcx\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_from_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_from_mont_384:\n.def\t__mulq_by_1_mont_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulq_by_1_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rax,%r14\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r8\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r9,%r15\n\timulq\t%rcx,%r9\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r10,%r8\n\timulq\t%rcx,%r10\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r8\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r11,%r9\n\timulq\t%rcx,%r11\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r12,%r10\n\timulq\t%rcx,%r12\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r13,%r11\n\timulq\t%rcx,%r13\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__redq_tail_mont_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__redq_tail_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t48(%rsi),%r14\n\tmovq\t%r14,%rax\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tmovq\t%r15,%rcx\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tsbbq\t%r12,%r12\n\n\n\n\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tsgn0_pty_mont_384\n\n.def\tsgn0_pty_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsgn0_pty_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sgn0_pty_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsgn0_pty_mont_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sgn0_pty_mont_384:\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t0(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n\tcall\t__mulq_by_1_mont_384\n\n\txorq\t%rax,%rax\n\tmovq\t%r14,%r13\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sgn0_pty_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sgn0_pty_mont_384:\n\n.globl\tsgn0_pty_mont_384x\n\n.def\tsgn0_pty_mont_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsgn0_pty_mont_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sgn0_pty_mont_384x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsgn0_pty_mont_384x$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sgn0_pty_mont_384x:\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t48(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n\tcall\t__mulq_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\tleaq\t0(%rdi),%rsi\n\txorq\t%rdi,%rdi\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r14,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%r13\n\tandq\t$2,%rdi\n\torq\t%r13,%rdi\n\n\tcall\t__mulq_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\txorq\t%rax,%rax\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%r12\n\n\tnotq\t%rax\n\n\ttestq\t%r14,%r14\n\tcmovzq\t%rdi,%r13\n\n\ttestq\t%r12,%r12\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sgn0_pty_mont_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sgn0_pty_mont_384x:\n.globl\tmul_mont_384\n\n.def\tmul_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmul_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mul_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_mont_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$24,%rsp\n\n.LSEH_body_mul_mont_384:\n\n\n\tmovq\t0(%rdx),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\tmovq\t%rdx,%rbx\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\n\tcall\t__mulq_mont_384\n\n\tmovq\t24(%rsp),%r15\n\n\tmovq\t32(%rsp),%r14\n\n\tmovq\t40(%rsp),%r13\n\n\tmovq\t48(%rsp),%r12\n\n\tmovq\t56(%rsp),%rbx\n\n\tmovq\t64(%rsp),%rbp\n\n\tleaq\t72(%rsp),%rsp\n\n.LSEH_epilogue_mul_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mul_mont_384:\n.def\t__mulq_mont_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulq_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rax,%rdi\n\tmulq\t%r14\n\tmovq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t%r15\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t%r12\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r8,%rbp\n\timulq\t8(%rsp),%r8\n\n\tmulq\t%r13\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\txorq\t%r15,%r15\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t%rdx,%r14\n\tadcq\t$0,%r15\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmovq\t%r9,%rbp\n\timulq\t8(%rsp),%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rsi)\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\txorq\t%r8,%r8\n\taddq\t%rax,%r14\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%r8\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%r8\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmovq\t%r10,%rbp\n\timulq\t8(%rsp),%r10\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rsi)\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\txorq\t%r9,%r9\n\taddq\t%rax,%r15\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r8\n\tadcq\t$0,%r9\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t%rdx,%r8\n\tadcq\t$0,%r9\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmovq\t%r11,%rbp\n\timulq\t8(%rsp),%r11\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rsi)\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\txorq\t%r10,%r10\n\taddq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r8\n\tadcq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r12,%rbp\n\timulq\t8(%rsp),%r12\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\txorq\t%r11,%r11\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\tadcq\t$0,%r11\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t%rdx,%r10\n\tadcq\t$0,%r11\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmovq\t%r13,%rbp\n\timulq\t8(%rsp),%r13\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t40(%rsi)\n\taddq\t%r12,%r10\n\tadcq\t$0,%rdx\n\txorq\t%r12,%r12\n\taddq\t%rax,%r10\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r12\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r12\n\n\n\n\n\tmovq\t16(%rsp),%rdi\n\tsubq\t0(%rcx),%r14\n\tmovq\t%r15,%rdx\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r8,%rbx\n\tsbbq\t16(%rcx),%r8\n\tmovq\t%r9,%rsi\n\tsbbq\t24(%rcx),%r9\n\tmovq\t%r10,%rbp\n\tsbbq\t32(%rcx),%r10\n\tmovq\t%r11,%r13\n\tsbbq\t40(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rdx,%r15\n\tcmovcq\t%rbx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%r13,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tsqr_n_mul_mont_384\n\n.def\tsqr_n_mul_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_n_mul_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_n_mul_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n\tmovq\t48(%rsp),%r9\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_n_mul_mont_384$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_sqr_n_mul_mont_384:\n\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rcx,16(%rsp)\n\tleaq\t32(%rsp),%rdi\n\tmovq\t%r9,24(%rsp)\n\tmovq\t(%r9),%xmm2\n\n.Loop_sqr_384:\n\tmovd\t%edx,%xmm1\n\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rdi),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t16(%rsp),%rbx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovd\t%xmm1,%edx\n\tleaq\t0(%rdi),%rsi\n\tdecl\t%edx\n\tjnz\t.Loop_sqr_384\n\n.byte\t102,72,15,126,208\n\tmovq\t%rbx,%rcx\n\tmovq\t24(%rsp),%rbx\n\n\n\n\n\n\n\tmovq\t%r8,%r12\n\tmovq\t%r9,%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t136(%rsp),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqr_n_mul_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_n_mul_mont_384:\n\n.globl\tsqr_n_mul_mont_383\n\n.def\tsqr_n_mul_mont_383;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_n_mul_mont_383:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_n_mul_mont_383:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n\tmovq\t48(%rsp),%r9\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_n_mul_mont_383$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_sqr_n_mul_mont_383:\n\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rcx,16(%rsp)\n\tleaq\t32(%rsp),%rdi\n\tmovq\t%r9,24(%rsp)\n\tmovq\t(%r9),%xmm2\n\n.Loop_sqr_383:\n\tmovd\t%edx,%xmm1\n\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rdi),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t16(%rsp),%rbx\n\tcall\t__mulq_by_1_mont_384\n\n\tmovd\t%xmm1,%edx\n\taddq\t48(%rsi),%r14\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tleaq\t0(%rdi),%rsi\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r8,16(%rdi)\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tdecl\t%edx\n\tjnz\t.Loop_sqr_383\n\n.byte\t102,72,15,126,208\n\tmovq\t%rbx,%rcx\n\tmovq\t24(%rsp),%rbx\n\n\n\n\n\n\n\tmovq\t%r8,%r12\n\tmovq\t%r9,%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t136(%rsp),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqr_n_mul_mont_383:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_n_mul_mont_383:\n.def\t__mulq_mont_383_nonred;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulq_mont_383_nonred:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rax,%rbp\n\tmulq\t%r14\n\tmovq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t%r15\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t%r12\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r8,%r15\n\timulq\t8(%rsp),%r8\n\n\tmulq\t%r13\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rcx)\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t%rdx,%r14\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmovq\t%r9,%r8\n\timulq\t8(%rsp),%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rsi)\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r8\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t24(%rcx)\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t%rdx,%r15\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmovq\t%r10,%r9\n\timulq\t8(%rsp),%r10\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rsi)\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rcx)\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t%rdx,%r8\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmovq\t%r11,%r10\n\timulq\t8(%rsp),%r11\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rsi)\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rcx)\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t%rdx,%r9\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmovq\t%r12,%r11\n\timulq\t8(%rsp),%r12\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rsi)\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rcx)\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t%rdx,%r10\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r13,%r12\n\timulq\t8(%rsp),%r13\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r10\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r12\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t24(%rcx)\n\taddq\t%r12,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r10\n\tadcq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tsqr_mont_382x\n\n.def\tsqr_mont_382x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqr_mont_382x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqr_mont_382x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_382x$1\n#endif\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_sqr_mont_382x:\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rdi,24(%rsp)\n\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rdx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rbx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rdx\n\tsbbq\t80(%rsi),%rbx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rdi,%rdi\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\tmovq\t%r14,32+48(%rsp)\n\tmovq\t%r15,32+56(%rsp)\n\tmovq\t%rax,32+64(%rsp)\n\tmovq\t%rdx,32+72(%rsp)\n\tmovq\t%rbx,32+80(%rsp)\n\tmovq\t%rbp,32+88(%rsp)\n\tmovq\t%rdi,32+96(%rsp)\n\n\n\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tmovq\t24(%rsp),%rdi\n\tcall\t__mulq_mont_383_nonred\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%r8,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r10,80(%rdi)\n\tmovq\t%r11,88(%rdi)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rax\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%r12\n\tmovq\t32+24(%rsp),%r13\n\n\tcall\t__mulq_mont_383_nonred\n\tmovq\t32+96(%rsp),%rsi\n\tmovq\t32+0(%rsp),%r12\n\tmovq\t32+8(%rsp),%r13\n\tandq\t%rsi,%r12\n\tmovq\t32+16(%rsp),%rax\n\tandq\t%rsi,%r13\n\tmovq\t32+24(%rsp),%rbx\n\tandq\t%rsi,%rax\n\tmovq\t32+32(%rsp),%rbp\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t32+40(%rsp),%rsi\n\n\tsubq\t%r12,%r14\n\tmovq\t0(%rcx),%r12\n\tsbbq\t%r13,%r15\n\tmovq\t8(%rcx),%r13\n\tsbbq\t%rax,%r8\n\tmovq\t16(%rcx),%rax\n\tsbbq\t%rbx,%r9\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t%rbp,%r10\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t%rsi,%r11\n\tsbbq\t%rsi,%rsi\n\n\tandq\t%rsi,%r12\n\tandq\t%rsi,%r13\n\tandq\t%rsi,%rax\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t40(%rcx),%rsi\n\n\taddq\t%r12,%r14\n\tadcq\t%r13,%r15\n\tadcq\t%rax,%r8\n\tadcq\t%rbx,%r9\n\tadcq\t%rbp,%r10\n\tadcq\t%rsi,%r11\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r8,16(%rdi)\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqr_mont_382x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqr_mont_382x:\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_mul_mont_384x\n.rva\t.LSEH_body_mul_mont_384x\n.rva\t.LSEH_info_mul_mont_384x_prologue\n\n.rva\t.LSEH_body_mul_mont_384x\n.rva\t.LSEH_epilogue_mul_mont_384x\n.rva\t.LSEH_info_mul_mont_384x_body\n\n.rva\t.LSEH_epilogue_mul_mont_384x\n.rva\t.LSEH_end_mul_mont_384x\n.rva\t.LSEH_info_mul_mont_384x_epilogue\n\n.rva\t.LSEH_begin_sqr_mont_384x\n.rva\t.LSEH_body_sqr_mont_384x\n.rva\t.LSEH_info_sqr_mont_384x_prologue\n\n.rva\t.LSEH_body_sqr_mont_384x\n.rva\t.LSEH_epilogue_sqr_mont_384x\n.rva\t.LSEH_info_sqr_mont_384x_body\n\n.rva\t.LSEH_epilogue_sqr_mont_384x\n.rva\t.LSEH_end_sqr_mont_384x\n.rva\t.LSEH_info_sqr_mont_384x_epilogue\n\n.rva\t.LSEH_begin_mul_382x\n.rva\t.LSEH_body_mul_382x\n.rva\t.LSEH_info_mul_382x_prologue\n\n.rva\t.LSEH_body_mul_382x\n.rva\t.LSEH_epilogue_mul_382x\n.rva\t.LSEH_info_mul_382x_body\n\n.rva\t.LSEH_epilogue_mul_382x\n.rva\t.LSEH_end_mul_382x\n.rva\t.LSEH_info_mul_382x_epilogue\n\n.rva\t.LSEH_begin_sqr_382x\n.rva\t.LSEH_body_sqr_382x\n.rva\t.LSEH_info_sqr_382x_prologue\n\n.rva\t.LSEH_body_sqr_382x\n.rva\t.LSEH_epilogue_sqr_382x\n.rva\t.LSEH_info_sqr_382x_body\n\n.rva\t.LSEH_epilogue_sqr_382x\n.rva\t.LSEH_end_sqr_382x\n.rva\t.LSEH_info_sqr_382x_epilogue\n\n.rva\t.LSEH_begin_mul_384\n.rva\t.LSEH_body_mul_384\n.rva\t.LSEH_info_mul_384_prologue\n\n.rva\t.LSEH_body_mul_384\n.rva\t.LSEH_epilogue_mul_384\n.rva\t.LSEH_info_mul_384_body\n\n.rva\t.LSEH_epilogue_mul_384\n.rva\t.LSEH_end_mul_384\n.rva\t.LSEH_info_mul_384_epilogue\n\n.rva\t.LSEH_begin_sqr_384\n.rva\t.LSEH_body_sqr_384\n.rva\t.LSEH_info_sqr_384_prologue\n\n.rva\t.LSEH_body_sqr_384\n.rva\t.LSEH_epilogue_sqr_384\n.rva\t.LSEH_info_sqr_384_body\n\n.rva\t.LSEH_epilogue_sqr_384\n.rva\t.LSEH_end_sqr_384\n.rva\t.LSEH_info_sqr_384_epilogue\n\n.rva\t.LSEH_begin_sqr_mont_384\n.rva\t.LSEH_body_sqr_mont_384\n.rva\t.LSEH_info_sqr_mont_384_prologue\n\n.rva\t.LSEH_body_sqr_mont_384\n.rva\t.LSEH_epilogue_sqr_mont_384\n.rva\t.LSEH_info_sqr_mont_384_body\n\n.rva\t.LSEH_epilogue_sqr_mont_384\n.rva\t.LSEH_end_sqr_mont_384\n.rva\t.LSEH_info_sqr_mont_384_epilogue\n\n.rva\t.LSEH_begin_redc_mont_384\n.rva\t.LSEH_body_redc_mont_384\n.rva\t.LSEH_info_redc_mont_384_prologue\n\n.rva\t.LSEH_body_redc_mont_384\n.rva\t.LSEH_epilogue_redc_mont_384\n.rva\t.LSEH_info_redc_mont_384_body\n\n.rva\t.LSEH_epilogue_redc_mont_384\n.rva\t.LSEH_end_redc_mont_384\n.rva\t.LSEH_info_redc_mont_384_epilogue\n\n.rva\t.LSEH_begin_from_mont_384\n.rva\t.LSEH_body_from_mont_384\n.rva\t.LSEH_info_from_mont_384_prologue\n\n.rva\t.LSEH_body_from_mont_384\n.rva\t.LSEH_epilogue_from_mont_384\n.rva\t.LSEH_info_from_mont_384_body\n\n.rva\t.LSEH_epilogue_from_mont_384\n.rva\t.LSEH_end_from_mont_384\n.rva\t.LSEH_info_from_mont_384_epilogue\n\n.rva\t.LSEH_begin_sgn0_pty_mont_384\n.rva\t.LSEH_body_sgn0_pty_mont_384\n.rva\t.LSEH_info_sgn0_pty_mont_384_prologue\n\n.rva\t.LSEH_body_sgn0_pty_mont_384\n.rva\t.LSEH_epilogue_sgn0_pty_mont_384\n.rva\t.LSEH_info_sgn0_pty_mont_384_body\n\n.rva\t.LSEH_epilogue_sgn0_pty_mont_384\n.rva\t.LSEH_end_sgn0_pty_mont_384\n.rva\t.LSEH_info_sgn0_pty_mont_384_epilogue\n\n.rva\t.LSEH_begin_sgn0_pty_mont_384x\n.rva\t.LSEH_body_sgn0_pty_mont_384x\n.rva\t.LSEH_info_sgn0_pty_mont_384x_prologue\n\n.rva\t.LSEH_body_sgn0_pty_mont_384x\n.rva\t.LSEH_epilogue_sgn0_pty_mont_384x\n.rva\t.LSEH_info_sgn0_pty_mont_384x_body\n\n.rva\t.LSEH_epilogue_sgn0_pty_mont_384x\n.rva\t.LSEH_end_sgn0_pty_mont_384x\n.rva\t.LSEH_info_sgn0_pty_mont_384x_epilogue\n\n.rva\t.LSEH_begin_mul_mont_384\n.rva\t.LSEH_body_mul_mont_384\n.rva\t.LSEH_info_mul_mont_384_prologue\n\n.rva\t.LSEH_body_mul_mont_384\n.rva\t.LSEH_epilogue_mul_mont_384\n.rva\t.LSEH_info_mul_mont_384_body\n\n.rva\t.LSEH_epilogue_mul_mont_384\n.rva\t.LSEH_end_mul_mont_384\n.rva\t.LSEH_info_mul_mont_384_epilogue\n\n.rva\t.LSEH_begin_sqr_n_mul_mont_384\n.rva\t.LSEH_body_sqr_n_mul_mont_384\n.rva\t.LSEH_info_sqr_n_mul_mont_384_prologue\n\n.rva\t.LSEH_body_sqr_n_mul_mont_384\n.rva\t.LSEH_epilogue_sqr_n_mul_mont_384\n.rva\t.LSEH_info_sqr_n_mul_mont_384_body\n\n.rva\t.LSEH_epilogue_sqr_n_mul_mont_384\n.rva\t.LSEH_end_sqr_n_mul_mont_384\n.rva\t.LSEH_info_sqr_n_mul_mont_384_epilogue\n\n.rva\t.LSEH_begin_sqr_n_mul_mont_383\n.rva\t.LSEH_body_sqr_n_mul_mont_383\n.rva\t.LSEH_info_sqr_n_mul_mont_383_prologue\n\n.rva\t.LSEH_body_sqr_n_mul_mont_383\n.rva\t.LSEH_epilogue_sqr_n_mul_mont_383\n.rva\t.LSEH_info_sqr_n_mul_mont_383_body\n\n.rva\t.LSEH_epilogue_sqr_n_mul_mont_383\n.rva\t.LSEH_end_sqr_n_mul_mont_383\n.rva\t.LSEH_info_sqr_n_mul_mont_383_epilogue\n\n.rva\t.LSEH_begin_sqr_mont_382x\n.rva\t.LSEH_body_sqr_mont_382x\n.rva\t.LSEH_info_sqr_mont_382x_prologue\n\n.rva\t.LSEH_body_sqr_mont_382x\n.rva\t.LSEH_epilogue_sqr_mont_382x\n.rva\t.LSEH_info_sqr_mont_382x_body\n\n.rva\t.LSEH_epilogue_sqr_mont_382x\n.rva\t.LSEH_end_sqr_mont_382x\n.rva\t.LSEH_info_sqr_mont_382x_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_mul_mont_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_mont_384x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x29,0x00\n.byte\t0x00,0xe4,0x2a,0x00\n.byte\t0x00,0xd4,0x2b,0x00\n.byte\t0x00,0xc4,0x2c,0x00\n.byte\t0x00,0x34,0x2d,0x00\n.byte\t0x00,0x54,0x2e,0x00\n.byte\t0x00,0x74,0x30,0x00\n.byte\t0x00,0x64,0x31,0x00\n.byte\t0x00,0x01,0x2f,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_mont_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_mont_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_mont_384x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_mont_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_382x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_382x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_382x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_382x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_382x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_382x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_384_body:\n.byte\t1,0,11,0\n.byte\t0x00,0xc4,0x00,0x00\n.byte\t0x00,0x34,0x01,0x00\n.byte\t0x00,0x54,0x02,0x00\n.byte\t0x00,0x74,0x04,0x00\n.byte\t0x00,0x64,0x05,0x00\n.byte\t0x00,0x22\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.LSEH_info_mul_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_mont_384_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x0f,0x00\n.byte\t0x00,0xe4,0x10,0x00\n.byte\t0x00,0xd4,0x11,0x00\n.byte\t0x00,0xc4,0x12,0x00\n.byte\t0x00,0x34,0x13,0x00\n.byte\t0x00,0x54,0x14,0x00\n.byte\t0x00,0x74,0x16,0x00\n.byte\t0x00,0x64,0x17,0x00\n.byte\t0x00,0x01,0x15,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_redc_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_redc_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_redc_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_from_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_from_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_from_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sgn0_pty_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sgn0_pty_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sgn0_pty_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sgn0_pty_mont_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sgn0_pty_mont_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sgn0_pty_mont_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mul_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mul_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x03,0x00\n.byte\t0x00,0xe4,0x04,0x00\n.byte\t0x00,0xd4,0x05,0x00\n.byte\t0x00,0xc4,0x06,0x00\n.byte\t0x00,0x34,0x07,0x00\n.byte\t0x00,0x54,0x08,0x00\n.byte\t0x00,0x74,0x0a,0x00\n.byte\t0x00,0x64,0x0b,0x00\n.byte\t0x00,0x82\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mul_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_n_mul_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_n_mul_mont_384_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_n_mul_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_n_mul_mont_383_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_n_mul_mont_383_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_n_mul_mont_383_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqr_mont_382x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqr_mont_382x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqr_mont_382x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/mulx_mont_256-x86_64.s",
    "content": ".text\t\n\n.globl\tmulx_mont_sparse_256\n\n.def\tmulx_mont_sparse_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmulx_mont_sparse_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mulx_mont_sparse_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\nmul_mont_sparse_256$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_mulx_mont_sparse_256:\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbp\n\tmovq\t24(%rsi),%r9\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%rax,%r11\n\tcall\t__mulx_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_mulx_mont_sparse_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mulx_mont_sparse_256:\n\n.globl\tsqrx_mont_sparse_256\n\n.def\tsqrx_mont_sparse_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_mont_sparse_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_mont_sparse_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nsqr_mont_sparse_256$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sqrx_mont_sparse_256:\n\n\n\tmovq\t%rsi,%rbx\n\tmovq\t%rcx,%r8\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbp\n\tmovq\t24(%rsi),%r9\n\tleaq\t-128(%rbx),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%rdx,%rax,%r11\n\tcall\t__mulx_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sqrx_mont_sparse_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_mont_sparse_256:\n.def\t__mulx_mont_sparse_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulx_mont_sparse_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmulxq\t%r15,%r15,%r12\n\tmulxq\t%rbp,%rbp,%r13\n\taddq\t%r15,%r11\n\tmulxq\t%r9,%r9,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rbp,%r12\n\tadcq\t%r9,%r13\n\tadcq\t$0,%r14\n\n\tmovq\t%rax,%r10\n\timulq\t%r8,%rax\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r11\n\tadcxq\t%r9,%r12\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r12\n\tadcxq\t%r9,%r13\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r15,%r9\n\tadoxq\t%r9,%r15\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r11,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r12\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r12\n\tadoxq\t%r9,%r13\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rbp,%r13\n\tadoxq\t%r9,%r14\n\tadcxq\t%r10,%r14\n\tadoxq\t%r10,%r15\n\tadcxq\t%r10,%r15\n\tadoxq\t%r10,%r10\n\tadcq\t$0,%r10\n\tmovq\t%rax,%r11\n\timulq\t%r8,%rax\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r12\n\tadcxq\t%r9,%r13\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r9,%r15\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r10,%r9\n\tadoxq\t%r9,%r10\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r11\n\tadoxq\t%r12,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r13\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r13\n\tadoxq\t%r9,%r14\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\tadcxq\t%r11,%r15\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\tadoxq\t%r11,%r11\n\tadcq\t$0,%r11\n\tmovq\t%rax,%r12\n\timulq\t%r8,%rax\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r9,%r15\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r9,%r10\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r10\n\tadcxq\t%r11,%r9\n\tadoxq\t%r9,%r11\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r12\n\tadoxq\t%r13,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r14\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadcxq\t%rbp,%r15\n\tadoxq\t%r9,%r10\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tadcxq\t%r12,%r11\n\tadoxq\t%r12,%r12\n\tadcq\t$0,%r12\n\timulq\t%r8,%rdx\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rcx),%r13,%r9\n\tadcxq\t%rax,%r13\n\tadoxq\t%r9,%r14\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r15\n\tadoxq\t%r9,%r10\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t%r14,%rdx\n\tleaq\t128(%rcx),%rcx\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r9,%r11\n\tmovq\t%r15,%rax\n\tadcxq\t%r13,%r11\n\tadoxq\t%r13,%r12\n\tadcq\t$0,%r12\n\n\n\n\n\tmovq\t%r10,%rbp\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%r9\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rdx,%r14\n\tcmovcq\t%rax,%r15\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%r9,%r11\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tfromx_mont_256\n\n.def\tfromx_mont_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nfromx_mont_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_fromx_mont_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nfrom_mont_256$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_fromx_mont_256:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulx_by_1_mont_256\n\n\n\n\n\n\tmovq\t%r15,%rdx\n\tmovq\t%r10,%r12\n\tmovq\t%r11,%r13\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r10\n\tsbbq\t24(%rbx),%r11\n\n\tcmovncq\t%r14,%rax\n\tcmovncq\t%r15,%rdx\n\tcmovncq\t%r10,%r12\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r11,%r13\n\tmovq\t%rdx,8(%rdi)\n\tmovq\t%r12,16(%rdi)\n\tmovq\t%r13,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_fromx_mont_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_fromx_mont_256:\n\n.globl\tredcx_mont_256\n\n.def\tredcx_mont_256;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nredcx_mont_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_redcx_mont_256:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nredc_mont_256$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_redcx_mont_256:\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulx_by_1_mont_256\n\n\taddq\t32(%rsi),%r14\n\tadcq\t40(%rsi),%r15\n\tmovq\t%r14,%rax\n\tadcq\t48(%rsi),%r10\n\tmovq\t%r15,%rdx\n\tadcq\t56(%rsi),%r11\n\tsbbq\t%rsi,%rsi\n\n\n\n\n\tmovq\t%r10,%r12\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r10\n\tmovq\t%r11,%r13\n\tsbbq\t24(%rbx),%r11\n\tsbbq\t$0,%rsi\n\n\tcmovncq\t%r14,%rax\n\tcmovncq\t%r15,%rdx\n\tcmovncq\t%r10,%r12\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r11,%r13\n\tmovq\t%rdx,8(%rdi)\n\tmovq\t%r12,16(%rdi)\n\tmovq\t%r13,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_redcx_mont_256:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_redcx_mont_256:\n.def\t__mulx_by_1_mont_256;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulx_by_1_mont_256:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r11\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tmovq\t%rax,%r14\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\tmovq\t%r11,%r15\n\timulq\t%rcx,%r11\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\tmovq\t%r12,%r10\n\timulq\t%rcx,%r12\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rbx)\n\tmovq\t%r13,%r11\n\timulq\t%rcx,%r13\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_mulx_mont_sparse_256\n.rva\t.LSEH_body_mulx_mont_sparse_256\n.rva\t.LSEH_info_mulx_mont_sparse_256_prologue\n\n.rva\t.LSEH_body_mulx_mont_sparse_256\n.rva\t.LSEH_epilogue_mulx_mont_sparse_256\n.rva\t.LSEH_info_mulx_mont_sparse_256_body\n\n.rva\t.LSEH_epilogue_mulx_mont_sparse_256\n.rva\t.LSEH_end_mulx_mont_sparse_256\n.rva\t.LSEH_info_mulx_mont_sparse_256_epilogue\n\n.rva\t.LSEH_begin_sqrx_mont_sparse_256\n.rva\t.LSEH_body_sqrx_mont_sparse_256\n.rva\t.LSEH_info_sqrx_mont_sparse_256_prologue\n\n.rva\t.LSEH_body_sqrx_mont_sparse_256\n.rva\t.LSEH_epilogue_sqrx_mont_sparse_256\n.rva\t.LSEH_info_sqrx_mont_sparse_256_body\n\n.rva\t.LSEH_epilogue_sqrx_mont_sparse_256\n.rva\t.LSEH_end_sqrx_mont_sparse_256\n.rva\t.LSEH_info_sqrx_mont_sparse_256_epilogue\n\n.rva\t.LSEH_begin_fromx_mont_256\n.rva\t.LSEH_body_fromx_mont_256\n.rva\t.LSEH_info_fromx_mont_256_prologue\n\n.rva\t.LSEH_body_fromx_mont_256\n.rva\t.LSEH_epilogue_fromx_mont_256\n.rva\t.LSEH_info_fromx_mont_256_body\n\n.rva\t.LSEH_epilogue_fromx_mont_256\n.rva\t.LSEH_end_fromx_mont_256\n.rva\t.LSEH_info_fromx_mont_256_epilogue\n\n.rva\t.LSEH_begin_redcx_mont_256\n.rva\t.LSEH_body_redcx_mont_256\n.rva\t.LSEH_info_redcx_mont_256_prologue\n\n.rva\t.LSEH_body_redcx_mont_256\n.rva\t.LSEH_epilogue_redcx_mont_256\n.rva\t.LSEH_info_redcx_mont_256_body\n\n.rva\t.LSEH_epilogue_redcx_mont_256\n.rva\t.LSEH_end_redcx_mont_256\n.rva\t.LSEH_info_redcx_mont_256_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_mulx_mont_sparse_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mulx_mont_sparse_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mulx_mont_sparse_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_mont_sparse_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_mont_sparse_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_mont_sparse_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_fromx_mont_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_fromx_mont_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_fromx_mont_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_redcx_mont_256_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_redcx_mont_256_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_redcx_mont_256_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/mulx_mont_384-x86_64.s",
    "content": ".text\t\n\n\n\n\n\n\n\n.def\t__subx_mod_384x384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__subx_mod_384x384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__addx_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__addx_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__subx_mod_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__subx_mod_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__subx_mod_384_a_is_loaded:\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tmulx_mont_384x\n\n.def\tmulx_mont_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmulx_mont_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mulx_mont_384x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\nmul_mont_384x$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$328,%rsp\n\n.LSEH_body_mulx_mont_384x:\n\n\n\tmovq\t%rdx,%rbx\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\tmovq\t%r8,0(%rsp)\n\n\n\n\n\tleaq\t40(%rsp),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\n\tleaq\t48(%rbx),%rbx\n\tleaq\t128+48(%rsi),%rsi\n\tleaq\t96(%rdi),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t8(%rsp),%rcx\n\tleaq\t(%rbx),%rsi\n\tleaq\t-48(%rbx),%rdx\n\tleaq\t40+192+48(%rsp),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t-48(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tleaq\t(%rdi),%rbx\n\tleaq\t48(%rdi),%rsi\n\tcall\t__mulx_384\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t40(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t40(%rsp),%rsi\n\tleaq\t40+96(%rsp),%rdx\n\tleaq\t40(%rsp),%rdi\n\tcall\t__subx_mod_384x384\n\n\tleaq\t(%rcx),%rbx\n\n\n\tleaq\t40(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\n\tleaq\t40+192(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tleaq\t328(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_mulx_mont_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mulx_mont_384x:\n.globl\tsqrx_mont_384x\n\n.def\tsqrx_mont_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_mont_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_mont_384x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nsqr_mont_384x$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_sqrx_mont_384x:\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__addx_mod_384\n\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32+48(%rsp),%rdi\n\tcall\t__subx_mod_384\n\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rsi),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\taddq\t%rdx,%rdx\n\tadcq\t%r15,%r15\n\tadcq\t%rax,%rax\n\tmovq\t%rdx,%r8\n\tadcq\t%r12,%r12\n\tmovq\t%r15,%r9\n\tadcq\t%rdi,%rdi\n\tmovq\t%rax,%r10\n\tadcq\t%rbp,%rbp\n\tmovq\t%r12,%r11\n\tsbbq\t%rsi,%rsi\n\n\tsubq\t0(%rcx),%rdx\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%rdi,%r13\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%r12\n\tsbbq\t32(%rcx),%rdi\n\tmovq\t%rbp,%r14\n\tsbbq\t40(%rcx),%rbp\n\tsbbq\t$0,%rsi\n\n\tcmovcq\t%r8,%rdx\n\tcmovcq\t%r9,%r15\n\tcmovcq\t%r10,%rax\n\tmovq\t%rdx,48(%rbx)\n\tcmovcq\t%r11,%r12\n\tmovq\t%r15,56(%rbx)\n\tcmovcq\t%r13,%rdi\n\tmovq\t%rax,64(%rbx)\n\tcmovcq\t%r14,%rbp\n\tmovq\t%r12,72(%rbx)\n\tmovq\t%rdi,80(%rbx)\n\tmovq\t%rbp,88(%rbx)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rdx\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%rax\n\tmovq\t32+24(%rsp),%r12\n\tmovq\t32+32(%rsp),%rdi\n\tmovq\t32+40(%rsp),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqrx_mont_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_mont_384x:\n\n.globl\tmulx_382x\n\n.def\tmulx_382x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmulx_382x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mulx_382x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nmul_382x$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_mulx_382x:\n\n\n\tleaq\t96(%rdi),%rdi\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t48(%rsi),%r8\n\tadcq\t56(%rsi),%r9\n\tadcq\t64(%rsi),%r10\n\tadcq\t72(%rsi),%r11\n\tadcq\t80(%rsi),%r12\n\tadcq\t88(%rsi),%r13\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t32(%rdx),%r12\n\tmovq\t40(%rdx),%r13\n\n\taddq\t48(%rdx),%r8\n\tadcq\t56(%rdx),%r9\n\tadcq\t64(%rdx),%r10\n\tadcq\t72(%rdx),%r11\n\tadcq\t80(%rdx),%r12\n\tadcq\t88(%rdx),%r13\n\n\tmovq\t%r8,32+48(%rsp)\n\tmovq\t%r9,32+56(%rsp)\n\tmovq\t%r10,32+64(%rsp)\n\tmovq\t%r11,32+72(%rsp)\n\tmovq\t%r12,32+80(%rsp)\n\tmovq\t%r13,32+88(%rsp)\n\n\n\tleaq\t32+0(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\tcall\t__mulx_384\n\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rbx\n\tleaq\t-96(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\n\tleaq\t48+128(%rsi),%rsi\n\tleaq\t48(%rbx),%rbx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t32(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tmovq\t%rsi,%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t0(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t-96(%rdi),%rsi\n\tleaq\t32(%rsp),%rdx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__subx_mod_384x384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_mulx_382x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mulx_382x:\n.globl\tsqrx_382x\n\n.def\tsqrx_382x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_382x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_382x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\nsqr_382x$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rsi\n\n.LSEH_body_sqrx_382x:\n\n\n\tmovq\t%rdx,%rcx\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%rbx\n\tmovq\t32(%rsi),%rbp\n\tmovq\t40(%rsi),%rdx\n\n\tmovq\t%r14,%r8\n\taddq\t48(%rsi),%r14\n\tmovq\t%r15,%r9\n\tadcq\t56(%rsi),%r15\n\tmovq\t%rax,%r10\n\tadcq\t64(%rsi),%rax\n\tmovq\t%rbx,%r11\n\tadcq\t72(%rsi),%rbx\n\tmovq\t%rbp,%r12\n\tadcq\t80(%rsi),%rbp\n\tmovq\t%rdx,%r13\n\tadcq\t88(%rsi),%rdx\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%rax,16(%rdi)\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rdx,40(%rdi)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__subx_mod_384_a_is_loaded\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-48(%rdi),%rbx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\tleaq\t96(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\tmovq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%r15\n\tmovq\t64(%rdi),%rax\n\tmovq\t72(%rdi),%rbx\n\tmovq\t80(%rdi),%rbp\n\taddq\t%r8,%r8\n\tmovq\t88(%rdi),%rdx\n\tadcq\t%r9,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%r10,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%r11,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%r12,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%r13,%r13\n\tmovq\t%r12,32(%rdi)\n\tadcq\t%r14,%r14\n\tmovq\t%r13,40(%rdi)\n\tadcq\t%r15,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%rax,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%rbx,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%rbp,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%rdx,%rdx\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sqrx_382x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_382x:\n.globl\tmulx_384\n\n.def\tmulx_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmulx_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mulx_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\nmul_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n.LSEH_body_mulx_384:\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmovq\t0(%rsp),%r15\n\n\tmovq\t8(%rsp),%r14\n\n\tmovq\t16(%rsp),%r13\n\n\tmovq\t24(%rsp),%r12\n\n\tmovq\t32(%rsp),%rbx\n\n\tmovq\t40(%rsp),%rbp\n\n\tleaq\t48(%rsp),%rsp\n\n.LSEH_epilogue_mulx_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mulx_384:\n\n.def\t__mulx_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulx_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rbx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tleaq\t-128(%rsi),%rsi\n\n\tmulxq\t%r14,%r9,%rcx\n\txorq\t%rbp,%rbp\n\n\tmulxq\t%r15,%r8,%rax\n\tadcxq\t%rcx,%r8\n\tmovq\t%r9,0(%rdi)\n\n\tmulxq\t%r10,%r9,%rcx\n\tadcxq\t%rax,%r9\n\n\tmulxq\t%r11,%r10,%rax\n\tadcxq\t%rcx,%r10\n\n\tmulxq\t%r12,%r11,%rcx\n\tadcxq\t%rax,%r11\n\n\tmulxq\t%r13,%r12,%r13\n\tmovq\t8(%rbx),%rdx\n\tadcxq\t%rcx,%r12\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,8(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,16(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,24(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,32(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,40(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t%rax,%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tmovq\t%r10,64(%rdi)\n\tmovq\t%r11,72(%rdi)\n\tmovq\t%r12,80(%rdi)\n\tmovq\t%r13,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.globl\tsqrx_384\n\n.def\tsqrx_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\nsqr_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tpushq\t%rdi\n\n.LSEH_body_sqrx_384:\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__sqrx_384\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sqrx_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_384:\n.def\t__sqrx_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__sqrx_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r14\n\tmovq\t16(%rsi),%r15\n\tmovq\t24(%rsi),%rcx\n\tmovq\t32(%rsi),%rbx\n\n\n\tmulxq\t%r14,%r8,%rdi\n\tmovq\t40(%rsi),%rbp\n\tmulxq\t%r15,%r9,%rax\n\taddq\t%rdi,%r9\n\tmulxq\t%rcx,%r10,%rdi\n\tadcq\t%rax,%r10\n\tmulxq\t%rbx,%r11,%rax\n\tadcq\t%rdi,%r11\n\tmulxq\t%rbp,%r12,%r13\n\tmovq\t%r14,%rdx\n\tadcq\t%rax,%r12\n\tadcq\t$0,%r13\n\n\n\txorq\t%r14,%r14\n\tmulxq\t%r15,%rdi,%rax\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rax,%r11\n\n\tmulxq\t%rcx,%rdi,%rax\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rax,%r12\n\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rax,%r13\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%r15,%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%r14,%rax\n\tadcxq\t%rax,%r14\n\n\n\txorq\t%r15,%r15\n\tmulxq\t%rcx,%rdi,%rax\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rax,%r13\n\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rax,%r14\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%r15,%rax\n\tadcxq\t%rax,%r15\n\n\n\txorq\t%rcx,%rcx\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rax,%r15\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%rbx,%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rcx,%rax\n\tadcxq\t%rax,%rcx\n\n\n\tmulxq\t%rbp,%rdi,%rbx\n\tmovq\t0(%rsi),%rdx\n\taddq\t%rdi,%rcx\n\tmovq\t8(%rsp),%rdi\n\tadcq\t$0,%rbx\n\n\n\txorq\t%rbp,%rbp\n\tadcxq\t%r8,%r8\n\tadcxq\t%r9,%r9\n\tadcxq\t%r10,%r10\n\tadcxq\t%r11,%r11\n\tadcxq\t%r12,%r12\n\n\n\tmulxq\t%rdx,%rdx,%rax\n\tmovq\t%rdx,0(%rdi)\n\tmovq\t8(%rsi),%rdx\n\tadoxq\t%rax,%r8\n\tmovq\t%r8,8(%rdi)\n\n\tmulxq\t%rdx,%r8,%rax\n\tmovq\t16(%rsi),%rdx\n\tadoxq\t%r8,%r9\n\tadoxq\t%rax,%r10\n\tmovq\t%r9,16(%rdi)\n\tmovq\t%r10,24(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t24(%rsi),%rdx\n\tadoxq\t%r8,%r11\n\tadoxq\t%r9,%r12\n\tadcxq\t%r13,%r13\n\tadcxq\t%r14,%r14\n\tmovq\t%r11,32(%rdi)\n\tmovq\t%r12,40(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t32(%rsi),%rdx\n\tadoxq\t%r8,%r13\n\tadoxq\t%r9,%r14\n\tadcxq\t%r15,%r15\n\tadcxq\t%rcx,%rcx\n\tmovq\t%r13,48(%rdi)\n\tmovq\t%r14,56(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t40(%rsi),%rdx\n\tadoxq\t%r8,%r15\n\tadoxq\t%r9,%rcx\n\tadcxq\t%rbx,%rbx\n\tadcxq\t%rbp,%rbp\n\tmovq\t%r15,64(%rdi)\n\tmovq\t%rcx,72(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tadoxq\t%r8,%rbx\n\tadoxq\t%r9,%rbp\n\n\tmovq\t%rbx,80(%rdi)\n\tmovq\t%rbp,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n\n\n.globl\tredcx_mont_384\n\n.def\tredcx_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nredcx_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_redcx_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nredc_mont_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_redcx_mont_384:\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_redcx_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_redcx_mont_384:\n\n\n\n\n.globl\tfromx_mont_384\n\n.def\tfromx_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nfromx_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_fromx_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nfrom_mont_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_fromx_mont_384:\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\n\n\n\tmovq\t%r14,%rax\n\tmovq\t%r15,%rcx\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_fromx_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_fromx_mont_384:\n.def\t__mulx_by_1_mont_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulx_by_1_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t%rcx,%rdx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\timulq\t%r8,%rdx\n\n\n\txorq\t%r14,%r14\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r13\n\tadoxq\t%r14,%rbp\n\tadcxq\t%rbp,%r14\n\timulq\t%r9,%rdx\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r14\n\tadoxq\t%r15,%rbp\n\tadcxq\t%rbp,%r15\n\timulq\t%r10,%rdx\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r15\n\tadoxq\t%r8,%rbp\n\tadcxq\t%rbp,%r8\n\timulq\t%r11,%rdx\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r8\n\tadoxq\t%r9,%rbp\n\tadcxq\t%rbp,%r9\n\timulq\t%r12,%rdx\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r9\n\tadoxq\t%r10,%rbp\n\tadcxq\t%rbp,%r10\n\timulq\t%r13,%rdx\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r10\n\tadoxq\t%r11,%rbp\n\tadcxq\t%rbp,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.def\t__redx_tail_mont_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__redx_tail_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t48(%rsi),%r14\n\tmovq\t%r14,%rax\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tmovq\t%r15,%rcx\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tsbbq\t%r12,%r12\n\n\n\n\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tsgn0x_pty_mont_384\n\n.def\tsgn0x_pty_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsgn0x_pty_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sgn0x_pty_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\nsgn0_pty_mont_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sgn0x_pty_mont_384:\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t0(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\txorq\t%rax,%rax\n\tmovq\t%r14,%r13\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sgn0x_pty_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sgn0x_pty_mont_384:\n\n.globl\tsgn0x_pty_mont_384x\n\n.def\tsgn0x_pty_mont_384x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsgn0x_pty_mont_384x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sgn0x_pty_mont_384x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\nsgn0_pty_mont_384x$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$8,%rsp\n\n.LSEH_body_sgn0x_pty_mont_384x:\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t48(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\tleaq\t0(%rdi),%rsi\n\txorq\t%rdi,%rdi\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r14,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%r13\n\tandq\t$2,%rdi\n\torq\t%r13,%rdi\n\n\tcall\t__mulx_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\txorq\t%rax,%rax\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%r12\n\n\tnotq\t%rax\n\n\ttestq\t%r14,%r14\n\tcmovzq\t%rdi,%r13\n\n\ttestq\t%r12,%r12\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n\n\tmovq\t16(%rsp),%r14\n\n\tmovq\t24(%rsp),%r13\n\n\tmovq\t32(%rsp),%r12\n\n\tmovq\t40(%rsp),%rbx\n\n\tmovq\t48(%rsp),%rbp\n\n\tleaq\t56(%rsp),%rsp\n\n.LSEH_epilogue_sgn0x_pty_mont_384x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sgn0x_pty_mont_384x:\n.globl\tmulx_mont_384\n\n.def\tmulx_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nmulx_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_mulx_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\nmul_mont_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tleaq\t-24(%rsp),%rsp\n\n.LSEH_body_mulx_mont_384:\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\tmovq\t%r8,(%rsp)\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t24(%rsp),%r15\n\n\tmovq\t32(%rsp),%r14\n\n\tmovq\t40(%rsp),%r13\n\n\tmovq\t48(%rsp),%r12\n\n\tmovq\t56(%rsp),%rbx\n\n\tmovq\t64(%rsp),%rbp\n\n\tleaq\t72(%rsp),%rsp\n\n.LSEH_epilogue_mulx_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_mulx_mont_384:\n.def\t__mulx_mont_384;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulx_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tmulxq\t%r15,%r14,%r10\n\tmulxq\t%rax,%r15,%r11\n\taddq\t%r14,%r9\n\tmulxq\t%r12,%rax,%r12\n\tadcq\t%r15,%r10\n\tmulxq\t%rdi,%rdi,%r13\n\tadcq\t%rax,%r11\n\tmulxq\t%rbp,%rbp,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rdi,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t$0,%r14\n\txorq\t%r15,%r15\n\n\tmovq\t%r8,16(%rsp)\n\timulq\t8(%rsp),%r8\n\n\n\txorq\t%rax,%rax\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r8,%rdx\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\tadoxq\t%rax,%r15\n\tadoxq\t%rax,%rax\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r8,%r14\n\tadoxq\t%r8,%r15\n\tadcxq\t%r8,%r15\n\tadoxq\t%r8,%rax\n\tadcxq\t%r8,%rax\n\tmovq\t%r9,16(%rsp)\n\timulq\t8(%rsp),%r9\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r9,%rdx\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r8,%rax\n\tadoxq\t%r8,%r8\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r9,%r15\n\tadoxq\t%r9,%rax\n\tadcxq\t%r9,%rax\n\tadoxq\t%r9,%r8\n\tadcxq\t%r9,%r8\n\tmovq\t%r10,16(%rsp)\n\timulq\t8(%rsp),%r10\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r10,%rdx\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\tadoxq\t%r9,%r8\n\tadoxq\t%r9,%r9\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\tadcxq\t%r10,%rax\n\tadoxq\t%r10,%r8\n\tadcxq\t%r10,%r8\n\tadoxq\t%r10,%r9\n\tadcxq\t%r10,%r9\n\tmovq\t%r11,16(%rsp)\n\timulq\t8(%rsp),%r11\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r11,%rdx\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\tadoxq\t%r10,%r9\n\tadoxq\t%r10,%r10\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tadcxq\t%r11,%r8\n\tadoxq\t%r11,%r9\n\tadcxq\t%r11,%r9\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\tmovq\t%r12,16(%rsp)\n\timulq\t8(%rsp),%r12\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r12,%rdx\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r11,%r10\n\tadoxq\t%r11,%r11\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r13,%rdx\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tadcxq\t%r12,%r9\n\tadoxq\t%r12,%r10\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tadcxq\t%r12,%r11\n\timulq\t8(%rsp),%rdx\n\tmovq\t24(%rsp),%rbx\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tmovq\t%r15,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tmovq\t%rax,%rsi\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\tmovq\t%r14,%rdx\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tleaq\t128(%rcx),%rcx\n\tmovq\t%r8,%r12\n\tadcq\t$0,%r11\n\n\n\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r9,%rdi\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%r8\n\tsbbq\t32(%rcx),%r9\n\tmovq\t%r10,%rbp\n\tsbbq\t40(%rcx),%r10\n\tsbbq\t$0,%r11\n\n\tcmovncq\t%r14,%rdx\n\tcmovcq\t%r13,%r15\n\tcmovcq\t%rsi,%rax\n\tcmovncq\t%r8,%r12\n\tmovq\t%rdx,0(%rbx)\n\tcmovncq\t%r9,%rdi\n\tmovq\t%r15,8(%rbx)\n\tcmovncq\t%r10,%rbp\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r12,24(%rbx)\n\tmovq\t%rdi,32(%rbx)\n\tmovq\t%rbp,40(%rbx)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rsi\n\tlfence\n\tjmpq\t*%rsi\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tsqrx_mont_384\n\n.def\tsqrx_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nsqr_mont_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tleaq\t-24(%rsp),%rsp\n\n.LSEH_body_sqrx_mont_384:\n\n\n\tmovq\t%rcx,%r8\n\tleaq\t-128(%rdx),%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tleaq\t(%rsi),%rbx\n\tmovq\t%r8,(%rsp)\n\tleaq\t-128(%rsi),%rsi\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t24(%rsp),%r15\n\n\tmovq\t32(%rsp),%r14\n\n\tmovq\t40(%rsp),%r13\n\n\tmovq\t48(%rsp),%r12\n\n\tmovq\t56(%rsp),%rbx\n\n\tmovq\t64(%rsp),%rbp\n\n\tleaq\t72(%rsp),%rsp\n\n.LSEH_epilogue_sqrx_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_mont_384:\n\n.globl\tsqrx_n_mul_mont_384\n\n.def\tsqrx_n_mul_mont_384;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_n_mul_mont_384:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_n_mul_mont_384:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n\tmovq\t48(%rsp),%r9\nsqr_n_mul_mont_384$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tleaq\t-40(%rsp),%rsp\n\n.LSEH_body_sqrx_n_mul_mont_384:\n\n\n\tmovq\t%rdx,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t%rsi,%rbx\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tmovq\t%r8,(%rsp)\n\tmovq\t%r9,24(%rsp)\n\tmovq\t0(%r9),%xmm2\n\n.Loop_sqrx_384:\n\tmovd\t%r10d,%xmm1\n\tleaq\t-128(%rbx),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovd\t%xmm1,%r10d\n\tdecl\t%r10d\n\tjnz\t.Loop_sqrx_384\n\n\tmovq\t%rdx,%r14\n.byte\t102,72,15,126,210\n\tleaq\t-128(%rbx),%rsi\n\tmovq\t24(%rsp),%rbx\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t40(%rsp),%r15\n\n\tmovq\t48(%rsp),%r14\n\n\tmovq\t56(%rsp),%r13\n\n\tmovq\t64(%rsp),%r12\n\n\tmovq\t72(%rsp),%rbx\n\n\tmovq\t80(%rsp),%rbp\n\n\tleaq\t88(%rsp),%rsp\n\n.LSEH_epilogue_sqrx_n_mul_mont_384:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_n_mul_mont_384:\n\n.globl\tsqrx_n_mul_mont_383\n\n.def\tsqrx_n_mul_mont_383;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_n_mul_mont_383:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_n_mul_mont_383:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\n\tmovq\t40(%rsp),%r8\n\tmovq\t48(%rsp),%r9\nsqr_n_mul_mont_383$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tleaq\t-40(%rsp),%rsp\n\n.LSEH_body_sqrx_n_mul_mont_383:\n\n\n\tmovq\t%rdx,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t%rsi,%rbx\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tmovq\t%r8,(%rsp)\n\tmovq\t%r9,24(%rsp)\n\tmovq\t0(%r9),%xmm2\n\tleaq\t-128(%rcx),%rcx\n\n.Loop_sqrx_383:\n\tmovd\t%r10d,%xmm1\n\tleaq\t-128(%rbx),%rsi\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\n\tmovd\t%xmm1,%r10d\n\tdecl\t%r10d\n\tjnz\t.Loop_sqrx_383\n\n\tmovq\t%rdx,%r14\n.byte\t102,72,15,126,210\n\tleaq\t-128(%rbx),%rsi\n\tmovq\t24(%rsp),%rbx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t40(%rsp),%r15\n\n\tmovq\t48(%rsp),%r14\n\n\tmovq\t56(%rsp),%r13\n\n\tmovq\t64(%rsp),%r12\n\n\tmovq\t72(%rsp),%rbx\n\n\tmovq\t80(%rsp),%rbp\n\n\tleaq\t88(%rsp),%rsp\n\n.LSEH_epilogue_sqrx_n_mul_mont_383:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_n_mul_mont_383:\n.def\t__mulx_mont_383_nonred;\t.scl 3;\t.type 32;\t.endef\n.p2align\t5\n__mulx_mont_383_nonred:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tmulxq\t%r15,%r14,%r10\n\tmulxq\t%rax,%r15,%r11\n\taddq\t%r14,%r9\n\tmulxq\t%r12,%rax,%r12\n\tadcq\t%r15,%r10\n\tmulxq\t%rdi,%rdi,%r13\n\tadcq\t%rax,%r11\n\tmulxq\t%rbp,%rbp,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rdi,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t$0,%r14\n\tmovq\t%r8,%rax\n\timulq\t8(%rsp),%r8\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r8,%rdx\n\tadoxq\t%rdi,%r14\n\tadcxq\t%r15,%rbp\n\tadoxq\t%rbp,%r15\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\tadcxq\t%rax,%r14\n\tadoxq\t%rax,%r15\n\tadcxq\t%rax,%r15\n\tmovq\t%r9,%r8\n\timulq\t8(%rsp),%r9\n\n\n\txorq\t%rax,%rax\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r9,%rdx\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rax,%rbp\n\tadoxq\t%rbp,%rax\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r8,%r15\n\tadoxq\t%r8,%rax\n\tadcxq\t%r8,%rax\n\tmovq\t%r10,%r9\n\timulq\t8(%rsp),%r10\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r10,%rdx\n\tadoxq\t%rdi,%rax\n\tadcxq\t%r8,%rbp\n\tadoxq\t%rbp,%r8\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\tadcxq\t%r9,%rax\n\tadoxq\t%r9,%r8\n\tadcxq\t%r9,%r8\n\tmovq\t%r11,%r10\n\timulq\t8(%rsp),%r11\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r11,%rdx\n\tadoxq\t%rdi,%r8\n\tadcxq\t%r9,%rbp\n\tadoxq\t%rbp,%r9\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tadcxq\t%r10,%r8\n\tadoxq\t%r10,%r9\n\tadcxq\t%r10,%r9\n\tmovq\t%r12,%r11\n\timulq\t8(%rsp),%r12\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r12,%rdx\n\tadoxq\t%rdi,%r9\n\tadcxq\t%r10,%rbp\n\tadoxq\t%rbp,%r10\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r13,%rdx\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tadcxq\t%r11,%r9\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\timulq\t8(%rsp),%rdx\n\tmovq\t24(%rsp),%rbx\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r14,%rdx\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\tadcq\t$0,%r10\n\tmovq\t%r8,%r12\n\n\tmovq\t%r14,0(%rbx)\n\tmovq\t%r15,8(%rbx)\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r9,%rdi\n\tmovq\t%r8,24(%rbx)\n\tmovq\t%r9,32(%rbx)\n\tmovq\t%r10,40(%rbx)\n\tmovq\t%r10,%rbp\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rsi\n\tlfence\n\tjmpq\t*%rsi\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tsqrx_mont_382x\n\n.def\tsqrx_mont_382x;\t.scl 2;\t.type 32;\t.endef\n.p2align\t5\nsqrx_mont_382x:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_sqrx_mont_382x:\n\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rcx\nsqr_mont_382x$1:\n\tpushq\t%rbp\n\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tsubq\t$136,%rsp\n\n.LSEH_body_sqrx_mont_382x:\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rdx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rbx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rdx\n\tsbbq\t80(%rsi),%rbx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rdi,%rdi\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\tmovq\t%r14,32+48(%rsp)\n\tmovq\t%r15,32+56(%rsp)\n\tmovq\t%rax,32+64(%rsp)\n\tmovq\t%rdx,32+72(%rsp)\n\tmovq\t%rbx,32+80(%rsp)\n\tmovq\t%rbp,32+88(%rsp)\n\tmovq\t%rdi,32+96(%rsp)\n\n\n\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\taddq\t%rdx,%rdx\n\tadcq\t%r15,%r15\n\tadcq\t%rax,%rax\n\tadcq\t%r12,%r12\n\tadcq\t%rdi,%rdi\n\tadcq\t%rbp,%rbp\n\n\tmovq\t%rdx,48(%rbx)\n\tmovq\t%r15,56(%rbx)\n\tmovq\t%rax,64(%rbx)\n\tmovq\t%r12,72(%rbx)\n\tmovq\t%rdi,80(%rbx)\n\tmovq\t%rbp,88(%rbx)\n\n\tleaq\t32-128(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rdx\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%rax\n\tmovq\t32+24(%rsp),%r12\n\tmovq\t32+32(%rsp),%rdi\n\tmovq\t32+40(%rsp),%rbp\n\n\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\tmovq\t32+96(%rsp),%r14\n\tleaq\t128(%rcx),%rcx\n\tmovq\t32+0(%rsp),%r8\n\tandq\t%r14,%r8\n\tmovq\t32+8(%rsp),%r9\n\tandq\t%r14,%r9\n\tmovq\t32+16(%rsp),%r10\n\tandq\t%r14,%r10\n\tmovq\t32+24(%rsp),%r11\n\tandq\t%r14,%r11\n\tmovq\t32+32(%rsp),%r13\n\tandq\t%r14,%r13\n\tandq\t32+40(%rsp),%r14\n\n\tsubq\t%r8,%rdx\n\tmovq\t0(%rcx),%r8\n\tsbbq\t%r9,%r15\n\tmovq\t8(%rcx),%r9\n\tsbbq\t%r10,%rax\n\tmovq\t16(%rcx),%r10\n\tsbbq\t%r11,%r12\n\tmovq\t24(%rcx),%r11\n\tsbbq\t%r13,%rdi\n\tmovq\t32(%rcx),%r13\n\tsbbq\t%r14,%rbp\n\tsbbq\t%r14,%r14\n\n\tandq\t%r14,%r8\n\tandq\t%r14,%r9\n\tandq\t%r14,%r10\n\tandq\t%r14,%r11\n\tandq\t%r14,%r13\n\tandq\t40(%rcx),%r14\n\n\taddq\t%r8,%rdx\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rax\n\tadcq\t%r11,%r12\n\tadcq\t%r13,%rdi\n\tadcq\t%r14,%rbp\n\n\tmovq\t%rdx,0(%rbx)\n\tmovq\t%r15,8(%rbx)\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r12,24(%rbx)\n\tmovq\t%rdi,32(%rbx)\n\tmovq\t%rbp,40(%rbx)\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n\n\tmovq\t8(%r8),%r14\n\n\tmovq\t16(%r8),%r13\n\n\tmovq\t24(%r8),%r12\n\n\tmovq\t32(%r8),%rbx\n\n\tmovq\t40(%r8),%rbp\n\n\tleaq\t48(%r8),%rsp\n\n.LSEH_epilogue_sqrx_mont_382x:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_sqrx_mont_382x:\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_mulx_mont_384x\n.rva\t.LSEH_body_mulx_mont_384x\n.rva\t.LSEH_info_mulx_mont_384x_prologue\n\n.rva\t.LSEH_body_mulx_mont_384x\n.rva\t.LSEH_epilogue_mulx_mont_384x\n.rva\t.LSEH_info_mulx_mont_384x_body\n\n.rva\t.LSEH_epilogue_mulx_mont_384x\n.rva\t.LSEH_end_mulx_mont_384x\n.rva\t.LSEH_info_mulx_mont_384x_epilogue\n\n.rva\t.LSEH_begin_sqrx_mont_384x\n.rva\t.LSEH_body_sqrx_mont_384x\n.rva\t.LSEH_info_sqrx_mont_384x_prologue\n\n.rva\t.LSEH_body_sqrx_mont_384x\n.rva\t.LSEH_epilogue_sqrx_mont_384x\n.rva\t.LSEH_info_sqrx_mont_384x_body\n\n.rva\t.LSEH_epilogue_sqrx_mont_384x\n.rva\t.LSEH_end_sqrx_mont_384x\n.rva\t.LSEH_info_sqrx_mont_384x_epilogue\n\n.rva\t.LSEH_begin_mulx_382x\n.rva\t.LSEH_body_mulx_382x\n.rva\t.LSEH_info_mulx_382x_prologue\n\n.rva\t.LSEH_body_mulx_382x\n.rva\t.LSEH_epilogue_mulx_382x\n.rva\t.LSEH_info_mulx_382x_body\n\n.rva\t.LSEH_epilogue_mulx_382x\n.rva\t.LSEH_end_mulx_382x\n.rva\t.LSEH_info_mulx_382x_epilogue\n\n.rva\t.LSEH_begin_sqrx_382x\n.rva\t.LSEH_body_sqrx_382x\n.rva\t.LSEH_info_sqrx_382x_prologue\n\n.rva\t.LSEH_body_sqrx_382x\n.rva\t.LSEH_epilogue_sqrx_382x\n.rva\t.LSEH_info_sqrx_382x_body\n\n.rva\t.LSEH_epilogue_sqrx_382x\n.rva\t.LSEH_end_sqrx_382x\n.rva\t.LSEH_info_sqrx_382x_epilogue\n\n.rva\t.LSEH_begin_mulx_384\n.rva\t.LSEH_body_mulx_384\n.rva\t.LSEH_info_mulx_384_prologue\n\n.rva\t.LSEH_body_mulx_384\n.rva\t.LSEH_epilogue_mulx_384\n.rva\t.LSEH_info_mulx_384_body\n\n.rva\t.LSEH_epilogue_mulx_384\n.rva\t.LSEH_end_mulx_384\n.rva\t.LSEH_info_mulx_384_epilogue\n\n.rva\t.LSEH_begin_sqrx_384\n.rva\t.LSEH_body_sqrx_384\n.rva\t.LSEH_info_sqrx_384_prologue\n\n.rva\t.LSEH_body_sqrx_384\n.rva\t.LSEH_epilogue_sqrx_384\n.rva\t.LSEH_info_sqrx_384_body\n\n.rva\t.LSEH_epilogue_sqrx_384\n.rva\t.LSEH_end_sqrx_384\n.rva\t.LSEH_info_sqrx_384_epilogue\n\n.rva\t.LSEH_begin_redcx_mont_384\n.rva\t.LSEH_body_redcx_mont_384\n.rva\t.LSEH_info_redcx_mont_384_prologue\n\n.rva\t.LSEH_body_redcx_mont_384\n.rva\t.LSEH_epilogue_redcx_mont_384\n.rva\t.LSEH_info_redcx_mont_384_body\n\n.rva\t.LSEH_epilogue_redcx_mont_384\n.rva\t.LSEH_end_redcx_mont_384\n.rva\t.LSEH_info_redcx_mont_384_epilogue\n\n.rva\t.LSEH_begin_fromx_mont_384\n.rva\t.LSEH_body_fromx_mont_384\n.rva\t.LSEH_info_fromx_mont_384_prologue\n\n.rva\t.LSEH_body_fromx_mont_384\n.rva\t.LSEH_epilogue_fromx_mont_384\n.rva\t.LSEH_info_fromx_mont_384_body\n\n.rva\t.LSEH_epilogue_fromx_mont_384\n.rva\t.LSEH_end_fromx_mont_384\n.rva\t.LSEH_info_fromx_mont_384_epilogue\n\n.rva\t.LSEH_begin_sgn0x_pty_mont_384\n.rva\t.LSEH_body_sgn0x_pty_mont_384\n.rva\t.LSEH_info_sgn0x_pty_mont_384_prologue\n\n.rva\t.LSEH_body_sgn0x_pty_mont_384\n.rva\t.LSEH_epilogue_sgn0x_pty_mont_384\n.rva\t.LSEH_info_sgn0x_pty_mont_384_body\n\n.rva\t.LSEH_epilogue_sgn0x_pty_mont_384\n.rva\t.LSEH_end_sgn0x_pty_mont_384\n.rva\t.LSEH_info_sgn0x_pty_mont_384_epilogue\n\n.rva\t.LSEH_begin_sgn0x_pty_mont_384x\n.rva\t.LSEH_body_sgn0x_pty_mont_384x\n.rva\t.LSEH_info_sgn0x_pty_mont_384x_prologue\n\n.rva\t.LSEH_body_sgn0x_pty_mont_384x\n.rva\t.LSEH_epilogue_sgn0x_pty_mont_384x\n.rva\t.LSEH_info_sgn0x_pty_mont_384x_body\n\n.rva\t.LSEH_epilogue_sgn0x_pty_mont_384x\n.rva\t.LSEH_end_sgn0x_pty_mont_384x\n.rva\t.LSEH_info_sgn0x_pty_mont_384x_epilogue\n\n.rva\t.LSEH_begin_mulx_mont_384\n.rva\t.LSEH_body_mulx_mont_384\n.rva\t.LSEH_info_mulx_mont_384_prologue\n\n.rva\t.LSEH_body_mulx_mont_384\n.rva\t.LSEH_epilogue_mulx_mont_384\n.rva\t.LSEH_info_mulx_mont_384_body\n\n.rva\t.LSEH_epilogue_mulx_mont_384\n.rva\t.LSEH_end_mulx_mont_384\n.rva\t.LSEH_info_mulx_mont_384_epilogue\n\n.rva\t.LSEH_begin_sqrx_mont_384\n.rva\t.LSEH_body_sqrx_mont_384\n.rva\t.LSEH_info_sqrx_mont_384_prologue\n\n.rva\t.LSEH_body_sqrx_mont_384\n.rva\t.LSEH_epilogue_sqrx_mont_384\n.rva\t.LSEH_info_sqrx_mont_384_body\n\n.rva\t.LSEH_epilogue_sqrx_mont_384\n.rva\t.LSEH_end_sqrx_mont_384\n.rva\t.LSEH_info_sqrx_mont_384_epilogue\n\n.rva\t.LSEH_begin_sqrx_n_mul_mont_384\n.rva\t.LSEH_body_sqrx_n_mul_mont_384\n.rva\t.LSEH_info_sqrx_n_mul_mont_384_prologue\n\n.rva\t.LSEH_body_sqrx_n_mul_mont_384\n.rva\t.LSEH_epilogue_sqrx_n_mul_mont_384\n.rva\t.LSEH_info_sqrx_n_mul_mont_384_body\n\n.rva\t.LSEH_epilogue_sqrx_n_mul_mont_384\n.rva\t.LSEH_end_sqrx_n_mul_mont_384\n.rva\t.LSEH_info_sqrx_n_mul_mont_384_epilogue\n\n.rva\t.LSEH_begin_sqrx_n_mul_mont_383\n.rva\t.LSEH_body_sqrx_n_mul_mont_383\n.rva\t.LSEH_info_sqrx_n_mul_mont_383_prologue\n\n.rva\t.LSEH_body_sqrx_n_mul_mont_383\n.rva\t.LSEH_epilogue_sqrx_n_mul_mont_383\n.rva\t.LSEH_info_sqrx_n_mul_mont_383_body\n\n.rva\t.LSEH_epilogue_sqrx_n_mul_mont_383\n.rva\t.LSEH_end_sqrx_n_mul_mont_383\n.rva\t.LSEH_info_sqrx_n_mul_mont_383_epilogue\n\n.rva\t.LSEH_begin_sqrx_mont_382x\n.rva\t.LSEH_body_sqrx_mont_382x\n.rva\t.LSEH_info_sqrx_mont_382x_prologue\n\n.rva\t.LSEH_body_sqrx_mont_382x\n.rva\t.LSEH_epilogue_sqrx_mont_382x\n.rva\t.LSEH_info_sqrx_mont_382x_body\n\n.rva\t.LSEH_epilogue_sqrx_mont_382x\n.rva\t.LSEH_end_sqrx_mont_382x\n.rva\t.LSEH_info_sqrx_mont_382x_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_mulx_mont_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mulx_mont_384x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x29,0x00\n.byte\t0x00,0xe4,0x2a,0x00\n.byte\t0x00,0xd4,0x2b,0x00\n.byte\t0x00,0xc4,0x2c,0x00\n.byte\t0x00,0x34,0x2d,0x00\n.byte\t0x00,0x54,0x2e,0x00\n.byte\t0x00,0x74,0x30,0x00\n.byte\t0x00,0x64,0x31,0x00\n.byte\t0x00,0x01,0x2f,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mulx_mont_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_mont_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_mont_384x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_mont_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mulx_382x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mulx_382x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mulx_382x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_382x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_382x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_382x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mulx_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mulx_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x00,0x00\n.byte\t0x00,0xe4,0x01,0x00\n.byte\t0x00,0xd4,0x02,0x00\n.byte\t0x00,0xc4,0x03,0x00\n.byte\t0x00,0x34,0x04,0x00\n.byte\t0x00,0x54,0x05,0x00\n.byte\t0x00,0x74,0x07,0x00\n.byte\t0x00,0x64,0x08,0x00\n.byte\t0x00,0x52\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mulx_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_redcx_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_redcx_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_redcx_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_fromx_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_fromx_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_fromx_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sgn0x_pty_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sgn0x_pty_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sgn0x_pty_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sgn0x_pty_mont_384x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sgn0x_pty_mont_384x_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x01,0x00\n.byte\t0x00,0xe4,0x02,0x00\n.byte\t0x00,0xd4,0x03,0x00\n.byte\t0x00,0xc4,0x04,0x00\n.byte\t0x00,0x34,0x05,0x00\n.byte\t0x00,0x54,0x06,0x00\n.byte\t0x00,0x74,0x08,0x00\n.byte\t0x00,0x64,0x09,0x00\n.byte\t0x00,0x62\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sgn0x_pty_mont_384x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_mulx_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_mulx_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x03,0x00\n.byte\t0x00,0xe4,0x04,0x00\n.byte\t0x00,0xd4,0x05,0x00\n.byte\t0x00,0xc4,0x06,0x00\n.byte\t0x00,0x34,0x07,0x00\n.byte\t0x00,0x54,0x08,0x00\n.byte\t0x00,0x74,0x0a,0x00\n.byte\t0x00,0x64,0x0b,0x00\n.byte\t0x00,0x82\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_mulx_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x03,0x00\n.byte\t0x00,0xe4,0x04,0x00\n.byte\t0x00,0xd4,0x05,0x00\n.byte\t0x00,0xc4,0x06,0x00\n.byte\t0x00,0x34,0x07,0x00\n.byte\t0x00,0x54,0x08,0x00\n.byte\t0x00,0x74,0x0a,0x00\n.byte\t0x00,0x64,0x0b,0x00\n.byte\t0x00,0x82\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_n_mul_mont_384_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_n_mul_mont_384_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x05,0x00\n.byte\t0x00,0xe4,0x06,0x00\n.byte\t0x00,0xd4,0x07,0x00\n.byte\t0x00,0xc4,0x08,0x00\n.byte\t0x00,0x34,0x09,0x00\n.byte\t0x00,0x54,0x0a,0x00\n.byte\t0x00,0x74,0x0c,0x00\n.byte\t0x00,0x64,0x0d,0x00\n.byte\t0x00,0xa2\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_n_mul_mont_384_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_n_mul_mont_383_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_n_mul_mont_383_body:\n.byte\t1,0,17,0\n.byte\t0x00,0xf4,0x05,0x00\n.byte\t0x00,0xe4,0x06,0x00\n.byte\t0x00,0xd4,0x07,0x00\n.byte\t0x00,0xc4,0x08,0x00\n.byte\t0x00,0x34,0x09,0x00\n.byte\t0x00,0x54,0x0a,0x00\n.byte\t0x00,0x74,0x0c,0x00\n.byte\t0x00,0x64,0x0d,0x00\n.byte\t0x00,0xa2\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_n_mul_mont_383_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_sqrx_mont_382x_prologue:\n.byte\t1,0,5,0x0b\n.byte\t0,0x74,1,0\n.byte\t0,0x64,2,0\n.byte\t0,0xb3\n.byte\t0,0\n.long\t0,0\n.LSEH_info_sqrx_mont_382x_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x11,0x00\n.byte\t0x00,0xe4,0x12,0x00\n.byte\t0x00,0xd4,0x13,0x00\n.byte\t0x00,0xc4,0x14,0x00\n.byte\t0x00,0x34,0x15,0x00\n.byte\t0x00,0x54,0x16,0x00\n.byte\t0x00,0x74,0x18,0x00\n.byte\t0x00,0x64,0x19,0x00\n.byte\t0x00,0x01,0x17,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_sqrx_mont_382x_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/sha256-armv8.S",
    "content": "//\n// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n//\n// ====================================================================\n// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n// project.\n// ====================================================================\n//\n// sha256_block procedure for ARMv8.\n//\n// This module is stripped of scalar code paths, with rationale that all\n// known processors are NEON-capable.\n//\n// See original module at CRYPTOGAMS for further details.\n\n.comm\t__blst_platform_cap,4\n.text\n\n.p2align\t6\n\n.LK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n.long\t0\t//terminator\n\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.align\t2\n.p2align\t2\n.globl\tblst_sha256_block_armv8\n\n.def\tblst_sha256_block_armv8;\n.type\t32;\n.endef\n.p2align\t6\nblst_sha256_block_armv8:\n\thint\t#34\n.Lv8_entry:\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tld1\t{v0.4s,v1.4s},[x0]\n\tadr\tx3,.LK256\n\n.Loop_hw:\n\tld1\t{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64\n\tsub\tx2,x2,#1\n\tld1\t{v16.4s},[x3],#16\n\trev32\tv4.16b,v4.16b\n\trev32\tv5.16b,v5.16b\n\trev32\tv6.16b,v6.16b\n\trev32\tv7.16b,v7.16b\n\torr\tv18.16b,v0.16b,v0.16b\t\t// offload\n\torr\tv19.16b,v1.16b,v1.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.long\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.long\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.long\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.long\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.long\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.long\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.long\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.long\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.long\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.long\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.long\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.long\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tld1\t{v17.4s},[x3]\n\tadd\tv16.4s,v16.4s,v6.4s\n\tsub\tx3,x3,#64*4-16\t// rewind\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tadd\tv17.4s,v17.4s,v7.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tadd\tv0.4s,v0.4s,v18.4s\n\tadd\tv1.4s,v1.4s,v19.4s\n\n\tcbnz\tx2,.Loop_hw\n\n\tst1\t{v0.4s,v1.4s},[x0]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\tret\n\n.globl\tblst_sha256_block_data_order\n\n.def\tblst_sha256_block_data_order;\n.type\t32;\n.endef\n.p2align\t4\nblst_sha256_block_data_order:\n\thint\t#34\n\tadrp\tx16,__blst_platform_cap\n\tldr\tw16,[x16,#:lo12:__blst_platform_cap]\n\ttst\tw16,#1\n\tb.ne\t.Lv8_entry\n\n\tstp\tx29, x30, [sp, #-2*__SIZEOF_POINTER__]!\n\tmov\tx29, sp\n\tsub\tsp,sp,#16*4\n\n\tadr\tx16,.LK256\n\tadd\tx2,x1,x2,lsl#6\t// len to point at the end of inp\n\n\tld1\t{v0.16b},[x1], #16\n\tld1\t{v1.16b},[x1], #16\n\tld1\t{v2.16b},[x1], #16\n\tld1\t{v3.16b},[x1], #16\n\tld1\t{v4.4s},[x16], #16\n\tld1\t{v5.4s},[x16], #16\n\tld1\t{v6.4s},[x16], #16\n\tld1\t{v7.4s},[x16], #16\n\trev32\tv0.16b,v0.16b\t\t// yes, even on\n\trev32\tv1.16b,v1.16b\t\t// big-endian\n\trev32\tv2.16b,v2.16b\n\trev32\tv3.16b,v3.16b\n\tmov\tx17,sp\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tv5.4s,v5.4s,v1.4s\n\tadd\tv6.4s,v6.4s,v2.4s\n\tst1\t{v4.4s,v5.4s},[x17], #32\n\tadd\tv7.4s,v7.4s,v3.4s\n\tst1\t{v6.4s,v7.4s},[x17]\n\tsub\tx17,x17,#32\n\n\tldp\tw3,w4,[x0]\n\tldp\tw5,w6,[x0,#8]\n\tldp\tw7,w8,[x0,#16]\n\tldp\tw9,w10,[x0,#24]\n\tldr\tw12,[sp,#0]\n\tmov\tw13,wzr\n\teor\tw14,w4,w5\n\tmov\tw15,wzr\n\tb\t.L_00_48\n\n.p2align\t4\n.L_00_48:\n\text\tv4.16b,v0.16b,v1.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v2.16b,v3.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v3.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv0.4s,v0.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv0.4s,v0.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv0.4s,v0.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v0.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v0.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v0.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v0.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v0.4s,#13\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv0.4s,v0.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v1.16b,v2.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v3.16b,v0.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v0.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv1.4s,v1.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv1.4s,v1.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv1.4s,v1.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v1.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v1.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v1.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v1.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v1.4s,#13\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv1.4s,v1.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v2.16b,v3.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v0.16b,v1.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v1.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv2.4s,v2.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv2.4s,v2.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv2.4s,v2.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v2.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v2.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v2.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v2.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v2.4s,#13\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv2.4s,v2.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v3.16b,v0.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v1.16b,v2.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v2.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv3.4s,v3.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv3.4s,v3.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv3.4s,v3.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v3.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v3.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v3.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v3.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v3.4s,#13\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv3.4s,v3.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[x16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tcmp\tw12,#0\t\t\t\t// check for K256 terminator\n\tldr\tw12,[sp,#0]\n\tsub\tx17,x17,#64\n\tbne\t.L_00_48\n\n\tsub\tx16,x16,#256\n\tcmp\tx1,x2\n\tmov\tx17, #-64\n\tcsel\tx17, x17, xzr, eq\n\tadd\tx1,x1,x17\n\tmov\tx17,sp\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v0.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv0.16b,v0.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v1.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv1.16b,v1.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v2.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv2.16b,v2.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v3.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv3.16b,v3.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw3,w3,w15\t\t\t// h+=Sigma0(a) from the past\n\tldp\tw11,w12,[x0,#0]\n\tadd\tw3,w3,w13\t\t\t// h+=Maj(a,b,c) from the past\n\tldp\tw13,w14,[x0,#8]\n\tadd\tw3,w3,w11\t\t\t// accumulate\n\tadd\tw4,w4,w12\n\tldp\tw11,w12,[x0,#16]\n\tadd\tw5,w5,w13\n\tadd\tw6,w6,w14\n\tldp\tw13,w14,[x0,#24]\n\tadd\tw7,w7,w11\n\tadd\tw8,w8,w12\n\tldr\tw12,[sp,#0]\n\tstp\tw3,w4,[x0,#0]\n\tadd\tw9,w9,w13\n\tmov\tw13,wzr\n\tstp\tw5,w6,[x0,#8]\n\tadd\tw10,w10,w14\n\tstp\tw7,w8,[x0,#16]\n\teor\tw14,w4,w5\n\tstp\tw9,w10,[x0,#24]\n\tmov\tw15,wzr\n\tmov\tx17,sp\n\tb.ne\t.L_00_48\n\n\tldr\tx29,[x29]\n\tadd\tsp,sp,#16*4+2*__SIZEOF_POINTER__\n\tret\n\n.globl\tblst_sha256_emit\n\n.def\tblst_sha256_emit;\n.type\t32;\n.endef\n.p2align\t4\nblst_sha256_emit:\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n#ifndef\t__AARCH64EB__\n\trev\tx4,x4\n\trev\tx5,x5\n\trev\tx6,x6\n\trev\tx7,x7\n#endif\n\tstr\tw4,[x0,#4]\n\tlsr\tx4,x4,#32\n\tstr\tw5,[x0,#12]\n\tlsr\tx5,x5,#32\n\tstr\tw6,[x0,#20]\n\tlsr\tx6,x6,#32\n\tstr\tw7,[x0,#28]\n\tlsr\tx7,x7,#32\n\tstr\tw4,[x0,#0]\n\tstr\tw5,[x0,#8]\n\tstr\tw6,[x0,#16]\n\tstr\tw7,[x0,#24]\n\tret\n\n\n.globl\tblst_sha256_bcopy\n\n.def\tblst_sha256_bcopy;\n.type\t32;\n.endef\n.p2align\t4\nblst_sha256_bcopy:\n\thint\t#34\n.Loop_bcopy:\n\tldrb\tw3,[x1],#1\n\tsub\tx2,x2,#1\n\tstrb\tw3,[x0],#1\n\tcbnz\tx2,.Loop_bcopy\n\tret\n\n\n.globl\tblst_sha256_hcopy\n\n.def\tblst_sha256_hcopy;\n.type\t32;\n.endef\n.p2align\t4\nblst_sha256_hcopy:\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tstp\tx4,x5,[x0]\n\tstp\tx6,x7,[x0,#16]\n\tret\n\n"
  },
  {
    "path": "build/coff/sha256-portable-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n.globl\tblst_sha256_block_data_order\n.def\tblst_sha256_block_data_order;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_block_data_order:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_blst_sha256_block_data_order:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rsp,%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n#ifdef __BLST_PORTABLE__\n\ttestl\t$2,__blst_platform_cap(%rip)\n\tjnz\t.Lblst_sha256_block_data_order$2\n#endif\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tshlq\t$4,%rdx\n\tsubq\t$64+24,%rsp\n\n\n.LSEH_body_blst_sha256_block_data_order:\n\n\tleaq\t(%rsi,%rdx,4),%rdx\n\tmovq\t%rdi,64+0(%rsp)\n\tmovq\t%rsi,64+8(%rsp)\n\tmovq\t%rdx,64+16(%rsp)\n\n\tmovl\t0(%rdi),%eax\n\tmovl\t4(%rdi),%ebx\n\tmovl\t8(%rdi),%ecx\n\tmovl\t12(%rdi),%edx\n\tmovl\t16(%rdi),%r8d\n\tmovl\t20(%rdi),%r9d\n\tmovl\t24(%rdi),%r10d\n\tmovl\t28(%rdi),%r11d\n\tjmp\t.Lloop\n\n.p2align\t4\n.Lloop:\n\tmovl\t%ebx,%edi\n\tleaq\tK256(%rip),%rbp\n\txorl\t%ecx,%edi\n\tmovl\t0(%rsi),%r12d\n\tmovl\t%r8d,%r13d\n\tmovl\t%eax,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,0(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t0(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r14d,%r11d\n\tmovl\t4(%rsi),%r12d\n\tmovl\t%edx,%r13d\n\tmovl\t%r11d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,4(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t4(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r14d,%r10d\n\tmovl\t8(%rsi),%r12d\n\tmovl\t%ecx,%r13d\n\tmovl\t%r10d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,8(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t8(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r14d,%r9d\n\tmovl\t12(%rsi),%r12d\n\tmovl\t%ebx,%r13d\n\tmovl\t%r9d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,12(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t12(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r14d,%r8d\n\tmovl\t16(%rsi),%r12d\n\tmovl\t%eax,%r13d\n\tmovl\t%r8d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,16(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t16(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r14d,%edx\n\tmovl\t20(%rsi),%r12d\n\tmovl\t%r11d,%r13d\n\tmovl\t%edx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,20(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t20(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r14d,%ecx\n\tmovl\t24(%rsi),%r12d\n\tmovl\t%r10d,%r13d\n\tmovl\t%ecx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,24(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t24(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r14d,%ebx\n\tmovl\t28(%rsi),%r12d\n\tmovl\t%r9d,%r13d\n\tmovl\t%ebx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,28(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t28(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r14d,%eax\n\tmovl\t32(%rsi),%r12d\n\tmovl\t%r8d,%r13d\n\tmovl\t%eax,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,32(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t32(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r14d,%r11d\n\tmovl\t36(%rsi),%r12d\n\tmovl\t%edx,%r13d\n\tmovl\t%r11d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,36(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t36(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r14d,%r10d\n\tmovl\t40(%rsi),%r12d\n\tmovl\t%ecx,%r13d\n\tmovl\t%r10d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,40(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t40(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r14d,%r9d\n\tmovl\t44(%rsi),%r12d\n\tmovl\t%ebx,%r13d\n\tmovl\t%r9d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,44(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t44(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r14d,%r8d\n\tmovl\t48(%rsi),%r12d\n\tmovl\t%eax,%r13d\n\tmovl\t%r8d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,48(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t48(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r14d,%edx\n\tmovl\t52(%rsi),%r12d\n\tmovl\t%r11d,%r13d\n\tmovl\t%edx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,52(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t52(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r14d,%ecx\n\tmovl\t56(%rsi),%r12d\n\tmovl\t%r10d,%r13d\n\tmovl\t%ecx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,56(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t56(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r14d,%ebx\n\tmovl\t60(%rsi),%r12d\n\tmovl\t%r9d,%r13d\n\tmovl\t%ebx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,60(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t60(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tjmp\t.Lrounds_16_xx\n.p2align\t4\n.Lrounds_16_xx:\n\tmovl\t4(%rsp),%r13d\n\tmovl\t56(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%eax\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t36(%rsp),%r12d\n\n\taddl\t0(%rsp),%r12d\n\tmovl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,0(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t64(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\tmovl\t8(%rsp),%r13d\n\tmovl\t60(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r11d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t40(%rsp),%r12d\n\n\taddl\t4(%rsp),%r12d\n\tmovl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,4(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t68(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\tmovl\t12(%rsp),%r13d\n\tmovl\t0(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r10d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t44(%rsp),%r12d\n\n\taddl\t8(%rsp),%r12d\n\tmovl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,8(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t72(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\tmovl\t16(%rsp),%r13d\n\tmovl\t4(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r9d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t48(%rsp),%r12d\n\n\taddl\t12(%rsp),%r12d\n\tmovl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,12(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t76(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\tmovl\t20(%rsp),%r13d\n\tmovl\t8(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r8d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t52(%rsp),%r12d\n\n\taddl\t16(%rsp),%r12d\n\tmovl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,16(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t80(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\tmovl\t24(%rsp),%r13d\n\tmovl\t12(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%edx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t56(%rsp),%r12d\n\n\taddl\t20(%rsp),%r12d\n\tmovl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,20(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t84(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\tmovl\t28(%rsp),%r13d\n\tmovl\t16(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ecx\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t60(%rsp),%r12d\n\n\taddl\t24(%rsp),%r12d\n\tmovl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,24(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t88(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\tmovl\t32(%rsp),%r13d\n\tmovl\t20(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ebx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t0(%rsp),%r12d\n\n\taddl\t28(%rsp),%r12d\n\tmovl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,28(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t92(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tmovl\t36(%rsp),%r13d\n\tmovl\t24(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%eax\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t4(%rsp),%r12d\n\n\taddl\t32(%rsp),%r12d\n\tmovl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,32(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t96(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\tmovl\t40(%rsp),%r13d\n\tmovl\t28(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r11d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t8(%rsp),%r12d\n\n\taddl\t36(%rsp),%r12d\n\tmovl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,36(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t100(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\tmovl\t44(%rsp),%r13d\n\tmovl\t32(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r10d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t12(%rsp),%r12d\n\n\taddl\t40(%rsp),%r12d\n\tmovl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,40(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t104(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\tmovl\t48(%rsp),%r13d\n\tmovl\t36(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r9d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t16(%rsp),%r12d\n\n\taddl\t44(%rsp),%r12d\n\tmovl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,44(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t108(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\tmovl\t52(%rsp),%r13d\n\tmovl\t40(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r8d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t20(%rsp),%r12d\n\n\taddl\t48(%rsp),%r12d\n\tmovl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,48(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t112(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\tmovl\t56(%rsp),%r13d\n\tmovl\t44(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%edx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t24(%rsp),%r12d\n\n\taddl\t52(%rsp),%r12d\n\tmovl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,52(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t116(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\tmovl\t60(%rsp),%r13d\n\tmovl\t48(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ecx\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t28(%rsp),%r12d\n\n\taddl\t56(%rsp),%r12d\n\tmovl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,56(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t120(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\tmovl\t0(%rsp),%r13d\n\tmovl\t52(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ebx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t32(%rsp),%r12d\n\n\taddl\t60(%rsp),%r12d\n\tmovl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,60(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t124(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tleaq\t64(%rbp),%rbp\n\tcmpb\t$0x19,3(%rbp)\n\tjnz\t.Lrounds_16_xx\n\n\tmovq\t64+0(%rsp),%rdi\n\taddl\t%r14d,%eax\n\tleaq\t64(%rsi),%rsi\n\n\taddl\t0(%rdi),%eax\n\taddl\t4(%rdi),%ebx\n\taddl\t8(%rdi),%ecx\n\taddl\t12(%rdi),%edx\n\taddl\t16(%rdi),%r8d\n\taddl\t20(%rdi),%r9d\n\taddl\t24(%rdi),%r10d\n\taddl\t28(%rdi),%r11d\n\n\tcmpq\t64+16(%rsp),%rsi\n\n\tmovl\t%eax,0(%rdi)\n\tmovl\t%ebx,4(%rdi)\n\tmovl\t%ecx,8(%rdi)\n\tmovl\t%edx,12(%rdi)\n\tmovl\t%r8d,16(%rdi)\n\tmovl\t%r9d,20(%rdi)\n\tmovl\t%r10d,24(%rdi)\n\tmovl\t%r11d,28(%rdi)\n\tjb\t.Lloop\n\n\tleaq\t64+24+48(%rsp),%r11\n\n\tmovq\t64+24(%rsp),%r15\n\tmovq\t-40(%r11),%r14\n\tmovq\t-32(%r11),%r13\n\tmovq\t-24(%r11),%r12\n\tmovq\t-16(%r11),%rbx\n\tmovq\t-8(%r11),%rbp\n.LSEH_epilogue_blst_sha256_block_data_order:\n\tmov\t8(%r11),%rdi\n\tmov\t16(%r11),%rsi\n\n\tleaq\t(%r11),%rsp\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_blst_sha256_block_data_order:\n\n#ifndef __BLST_PORTABLE__\n.section\t.rdata\n.p2align\t6\n\nK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.globl\tblst_sha256_emit\n\n.def\tblst_sha256_emit;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_emit:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tbswapq\t%r8\n\tmovq\t24(%rdx),%r11\n\tbswapq\t%r9\n\tmovl\t%r8d,4(%rcx)\n\tbswapq\t%r10\n\tmovl\t%r9d,12(%rcx)\n\tbswapq\t%r11\n\tmovl\t%r10d,20(%rcx)\n\tshrq\t$32,%r8\n\tmovl\t%r11d,28(%rcx)\n\tshrq\t$32,%r9\n\tmovl\t%r8d,0(%rcx)\n\tshrq\t$32,%r10\n\tmovl\t%r9d,8(%rcx)\n\tshrq\t$32,%r11\n\tmovl\t%r10d,16(%rcx)\n\tmovl\t%r11d,24(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tblst_sha256_bcopy\n\n.def\tblst_sha256_bcopy;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_bcopy:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tsubq\t%rdx,%rcx\n.Loop_bcopy:\n\tmovzbl\t(%rdx),%eax\n\tleaq\t1(%rdx),%rdx\n\tmovb\t%al,-1(%rcx,%rdx,1)\n\tdecq\t%r8\n\tjnz\t.Loop_bcopy\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tblst_sha256_hcopy\n\n.def\tblst_sha256_hcopy;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_hcopy:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t%r8,0(%rcx)\n\tmovq\t%r9,8(%rcx)\n\tmovq\t%r10,16(%rcx)\n\tmovq\t%r11,24(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n#endif\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_blst_sha256_block_data_order\n.rva\t.LSEH_body_blst_sha256_block_data_order\n.rva\t.LSEH_info_blst_sha256_block_data_order_prologue\n\n.rva\t.LSEH_body_blst_sha256_block_data_order\n.rva\t.LSEH_epilogue_blst_sha256_block_data_order\n.rva\t.LSEH_info_blst_sha256_block_data_order_body\n\n.rva\t.LSEH_epilogue_blst_sha256_block_data_order\n.rva\t.LSEH_end_blst_sha256_block_data_order\n.rva\t.LSEH_info_blst_sha256_block_data_order_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_blst_sha256_block_data_order_prologue:\n.byte\t1,4,6,0x05\n.byte\t4,0x74,2,0\n.byte\t4,0x64,3,0\n.byte\t4,0x53\n.byte\t1,0x50\n.long\t0,0\n.LSEH_info_blst_sha256_block_data_order_body:\n.byte\t1,0,18,0\n.byte\t0x00,0xf4,0x0b,0x00\n.byte\t0x00,0xe4,0x0c,0x00\n.byte\t0x00,0xd4,0x0d,0x00\n.byte\t0x00,0xc4,0x0e,0x00\n.byte\t0x00,0x34,0x0f,0x00\n.byte\t0x00,0x54,0x10,0x00\n.byte\t0x00,0x74,0x12,0x00\n.byte\t0x00,0x64,0x13,0x00\n.byte\t0x00,0x01,0x11,0x00\n.byte\t0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_blst_sha256_block_data_order_epilogue:\n.byte\t1,0,5,11\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0xb3\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/coff/sha256-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n\n.section\t.rdata\n.p2align\t6\n\nK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n.long\t0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f\n.long\t0x03020100,0x0b0a0908,0xffffffff,0xffffffff\n.long\t0xffffffff,0xffffffff,0x03020100,0x0b0a0908\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.text\t\n.globl\tblst_sha256_block_data_order_shaext\n\n.def\tblst_sha256_block_data_order_shaext;\t.scl 2;\t.type 32;\t.endef\n.p2align\t6\nblst_sha256_block_data_order_shaext:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_blst_sha256_block_data_order_shaext:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rsp,%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n.Lblst_sha256_block_data_order$2:\n\tsubq\t$0x50,%rsp\n\n\tmovaps\t%xmm6,-80(%rbp)\n\tmovaps\t%xmm7,-64(%rbp)\n\tmovaps\t%xmm8,-48(%rbp)\n\tmovaps\t%xmm9,-32(%rbp)\n\tmovaps\t%xmm10,-16(%rbp)\n\n.LSEH_body_blst_sha256_block_data_order_shaext:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tleaq\tK256+128(%rip),%rcx\n\tmovdqu\t(%rdi),%xmm1\n\tmovdqu\t16(%rdi),%xmm2\n\tmovdqa\t256-128(%rcx),%xmm7\n\n\tpshufd\t$0x1b,%xmm1,%xmm0\n\tpshufd\t$0xb1,%xmm1,%xmm1\n\tpshufd\t$0x1b,%xmm2,%xmm2\n\tmovdqa\t%xmm7,%xmm8\n.byte\t102,15,58,15,202,8\n\tpunpcklqdq\t%xmm0,%xmm2\n\tjmp\t.Loop_shaext\n\n.p2align\t4\n.Loop_shaext:\n\tmovdqu\t(%rsi),%xmm3\n\tmovdqu\t16(%rsi),%xmm4\n\tmovdqu\t32(%rsi),%xmm5\n.byte\t102,15,56,0,223\n\tmovdqu\t48(%rsi),%xmm6\n\n\tmovdqa\t0-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t102,15,56,0,231\n\tmovdqa\t%xmm2,%xmm10\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tnop\n\tmovdqa\t%xmm1,%xmm9\n.byte\t15,56,203,202\n\n\tmovdqa\t16-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t102,15,56,0,239\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tleaq\t64(%rsi),%rsi\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\n\tmovdqa\t32-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t102,15,56,0,247\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\n\tmovdqa\t48-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t64-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t80-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n\tnop\n\tpaddd\t%xmm7,%xmm6\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\tmovdqa\t96-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,205,245\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\tmovdqa\t112-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t128-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t144-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n\tnop\n\tpaddd\t%xmm7,%xmm6\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\tmovdqa\t160-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,205,245\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\tmovdqa\t176-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t192-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t208-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n.byte\t15,56,203,202\n\tpaddd\t%xmm7,%xmm6\n\n\tmovdqa\t224-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n.byte\t15,56,205,245\n\tmovdqa\t%xmm8,%xmm7\n.byte\t15,56,203,202\n\n\tmovdqa\t240-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n\tnop\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tdecq\t%rdx\n\tnop\n.byte\t15,56,203,202\n\n\tpaddd\t%xmm10,%xmm2\n\tpaddd\t%xmm9,%xmm1\n\tjnz\t.Loop_shaext\n\n\tpshufd\t$0xb1,%xmm2,%xmm2\n\tpshufd\t$0x1b,%xmm1,%xmm7\n\tpshufd\t$0xb1,%xmm1,%xmm1\n\tpunpckhqdq\t%xmm2,%xmm1\n.byte\t102,15,58,15,215,8\n\n\tmovdqu\t%xmm1,(%rdi)\n\tmovdqu\t%xmm2,16(%rdi)\n\tmovaps\t-80(%rbp),%xmm6\n\tmovaps\t-64(%rbp),%xmm7\n\tmovaps\t-48(%rbp),%xmm8\n\tmovaps\t-32(%rbp),%xmm9\n\tmovaps\t-16(%rbp),%xmm10\n\tmovq\t%rbp,%rsp\n\n\tpopq\t%rbp\n\n.LSEH_epilogue_blst_sha256_block_data_order_shaext:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_blst_sha256_block_data_order_shaext:\n.globl\tblst_sha256_block_data_order\n\n.def\tblst_sha256_block_data_order;\t.scl 2;\t.type 32;\t.endef\n.p2align\t6\nblst_sha256_block_data_order:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rsp,%r11\n.LSEH_begin_blst_sha256_block_data_order:\n\n\n\tpushq\t%rbp\n\n\tmovq\t%rsp,%rbp\n\n\tmovq\t%rcx,%rdi\n\tmovq\t%rdx,%rsi\n\tmovq\t%r8,%rdx\n#ifndef\t__SGX_LVI_HARDENING__\n\ttestl\t$2,__blst_platform_cap(%rip)\n\tjnz\t.Lblst_sha256_block_data_order$2\n#endif\n\tpushq\t%rbx\n\n\tpushq\t%r12\n\n\tpushq\t%r13\n\n\tpushq\t%r14\n\n\tpushq\t%r15\n\n\tshlq\t$4,%rdx\n\tsubq\t$88,%rsp\n\n\tleaq\t(%rsi,%rdx,4),%rdx\n\tmovq\t%rdi,-64(%rbp)\n\n\tmovq\t%rdx,-48(%rbp)\n\tmovaps\t%xmm6,-128(%rbp)\n\tmovaps\t%xmm7,-112(%rbp)\n\tmovaps\t%xmm8,-96(%rbp)\n\tmovaps\t%xmm9,-80(%rbp)\n\n.LSEH_body_blst_sha256_block_data_order:\n\n\n\tleaq\t-64(%rsp),%rsp\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovl\t0(%rdi),%eax\n\tandq\t$-64,%rsp\n\tmovl\t4(%rdi),%ebx\n\tmovl\t8(%rdi),%ecx\n\tmovl\t12(%rdi),%edx\n\tmovl\t16(%rdi),%r8d\n\tmovl\t20(%rdi),%r9d\n\tmovl\t24(%rdi),%r10d\n\tmovl\t28(%rdi),%r11d\n\n\n\tjmp\t.Lloop_ssse3\n.p2align\t4\n.Lloop_ssse3:\n\tmovdqa\tK256+256(%rip),%xmm7\n\tmovq\t%rsi,-56(%rbp)\n\tmovdqu\t0(%rsi),%xmm0\n\tmovdqu\t16(%rsi),%xmm1\n\tmovdqu\t32(%rsi),%xmm2\n.byte\t102,15,56,0,199\n\tmovdqu\t48(%rsi),%xmm3\n\tleaq\tK256(%rip),%rsi\n.byte\t102,15,56,0,207\n\tmovdqa\t0(%rsi),%xmm4\n\tmovdqa\t16(%rsi),%xmm5\n.byte\t102,15,56,0,215\n\tpaddd\t%xmm0,%xmm4\n\tmovdqa\t32(%rsi),%xmm6\n.byte\t102,15,56,0,223\n\tmovdqa\t48(%rsi),%xmm7\n\tpaddd\t%xmm1,%xmm5\n\tpaddd\t%xmm2,%xmm6\n\tpaddd\t%xmm3,%xmm7\n\tmovdqa\t%xmm4,0(%rsp)\n\tmovl\t%eax,%r14d\n\tmovdqa\t%xmm5,16(%rsp)\n\tmovl\t%ebx,%edi\n\tmovdqa\t%xmm6,32(%rsp)\n\txorl\t%ecx,%edi\n\tmovdqa\t%xmm7,48(%rsp)\n\tmovl\t%r8d,%r13d\n\tjmp\t.Lssse3_00_47\n\n.p2align\t4\n.Lssse3_00_47:\n\tsubq\t$-64,%rsi\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm1,%xmm4\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\tmovdqa\t%xmm3,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n.byte\t102,15,58,15,224,4\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n.byte\t102,15,58,15,250,4\n\taddl\t0(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\tpaddd\t%xmm7,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\tpshufd\t$250,%xmm3,%xmm7\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%r11d,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t4(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\tpaddd\t%xmm4,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%ecx,%r13d\n\taddl\t8(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm0\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\tpshufd\t$80,%xmm0,%xmm7\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t12(%rsp),%r8d\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\tmovdqa\t0(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\tpaddd\t%xmm7,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tpaddd\t%xmm0,%xmm6\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\tmovdqa\t%xmm6,0(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm2,%xmm4\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\tmovdqa\t%xmm0,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n.byte\t102,15,58,15,225,4\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n.byte\t102,15,58,15,251,4\n\taddl\t16(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\tpaddd\t%xmm7,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\tpshufd\t$250,%xmm0,%xmm7\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%edx,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t20(%rsp),%ecx\n\tmovl\t%edx,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\tpaddd\t%xmm4,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%r10d,%r13d\n\taddl\t24(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm1\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\tpshufd\t$80,%xmm1,%xmm7\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t28(%rsp),%eax\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\tmovdqa\t16(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\tpaddd\t%xmm7,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tpaddd\t%xmm1,%xmm6\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovdqa\t%xmm6,16(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm3,%xmm4\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\tmovdqa\t%xmm1,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n.byte\t102,15,58,15,226,4\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n.byte\t102,15,58,15,248,4\n\taddl\t32(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\tpaddd\t%xmm7,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\tpshufd\t$250,%xmm1,%xmm7\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%r11d,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t36(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\tpaddd\t%xmm4,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%ecx,%r13d\n\taddl\t40(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm2\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\tpshufd\t$80,%xmm2,%xmm7\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t44(%rsp),%r8d\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\tmovdqa\t32(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\tpaddd\t%xmm7,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tpaddd\t%xmm2,%xmm6\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\tmovdqa\t%xmm6,32(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm0,%xmm4\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\tmovdqa\t%xmm2,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n.byte\t102,15,58,15,227,4\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n.byte\t102,15,58,15,249,4\n\taddl\t48(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\tpaddd\t%xmm7,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\tpshufd\t$250,%xmm2,%xmm7\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%edx,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t52(%rsp),%ecx\n\tmovl\t%edx,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\tpaddd\t%xmm4,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%r10d,%r13d\n\taddl\t56(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm3\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\tpshufd\t$80,%xmm3,%xmm7\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t60(%rsp),%eax\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\tmovdqa\t48(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\tpaddd\t%xmm7,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tpaddd\t%xmm3,%xmm6\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovdqa\t%xmm6,48(%rsp)\n\tcmpb\t$0,67(%rsi)\n\tjne\t.Lssse3_00_47\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n\taddl\t0(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\taddl\t4(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\txorl\t%ecx,%r13d\n\taddl\t8(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t12(%rsp),%r8d\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n\taddl\t16(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\taddl\t20(%rsp),%ecx\n\tmovl\t%edx,%edi\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\txorl\t%r10d,%r13d\n\taddl\t24(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t28(%rsp),%eax\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n\taddl\t32(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\taddl\t36(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\txorl\t%ecx,%r13d\n\taddl\t40(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t44(%rsp),%r8d\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n\taddl\t48(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\taddl\t52(%rsp),%ecx\n\tmovl\t%edx,%edi\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\txorl\t%r10d,%r13d\n\taddl\t56(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t60(%rsp),%eax\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovq\t-64(%rbp),%rdi\n\tmovl\t%r14d,%eax\n\tmovq\t-56(%rbp),%rsi\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\taddl\t0(%rdi),%eax\n\taddl\t4(%rdi),%ebx\n\taddl\t8(%rdi),%ecx\n\taddl\t12(%rdi),%edx\n\taddl\t16(%rdi),%r8d\n\taddl\t20(%rdi),%r9d\n\taddl\t24(%rdi),%r10d\n\taddl\t28(%rdi),%r11d\n\n\tleaq\t64(%rsi),%rsi\n\tcmpq\t-48(%rbp),%rsi\n\n\tmovl\t%eax,0(%rdi)\n\tmovl\t%ebx,4(%rdi)\n\tmovl\t%ecx,8(%rdi)\n\tmovl\t%edx,12(%rdi)\n\tmovl\t%r8d,16(%rdi)\n\tmovl\t%r9d,20(%rdi)\n\tmovl\t%r10d,24(%rdi)\n\tmovl\t%r11d,28(%rdi)\n\tjb\t.Lloop_ssse3\n\n\txorps\t%xmm0,%xmm0\n\tmovaps\t%xmm0,0(%rsp)\n\tmovaps\t%xmm0,16(%rsp)\n\tmovaps\t%xmm0,32(%rsp)\n\tmovaps\t%xmm0,48(%rsp)\n\tmovaps\t-128(%rbp),%xmm6\n\tmovaps\t-112(%rbp),%xmm7\n\tmovaps\t-96(%rbp),%xmm8\n\tmovaps\t-80(%rbp),%xmm9\n\tmovq\t-40(%rbp),%r15\n\tmovq\t-32(%rbp),%r14\n\tmovq\t-24(%rbp),%r13\n\tmovq\t-16(%rbp),%r12\n\tmovq\t-8(%rbp),%rbx\n\tmovq\t%rbp,%rsp\n\n\tpopq\t%rbp\n\n.LSEH_epilogue_blst_sha256_block_data_order:\n\tmov\t8(%rsp),%rdi\n\tmov\t16(%rsp),%rsi\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.LSEH_end_blst_sha256_block_data_order:\n.globl\tblst_sha256_emit\n\n.def\tblst_sha256_emit;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_emit:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tbswapq\t%r8\n\tmovq\t24(%rdx),%r11\n\tbswapq\t%r9\n\tmovl\t%r8d,4(%rcx)\n\tbswapq\t%r10\n\tmovl\t%r9d,12(%rcx)\n\tbswapq\t%r11\n\tmovl\t%r10d,20(%rcx)\n\tshrq\t$32,%r8\n\tmovl\t%r11d,28(%rcx)\n\tshrq\t$32,%r9\n\tmovl\t%r8d,0(%rcx)\n\tshrq\t$32,%r10\n\tmovl\t%r9d,8(%rcx)\n\tshrq\t$32,%r11\n\tmovl\t%r10d,16(%rcx)\n\tmovl\t%r11d,24(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tblst_sha256_bcopy\n\n.def\tblst_sha256_bcopy;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_bcopy:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tsubq\t%rdx,%rcx\n.Loop_bcopy:\n\tmovzbl\t(%rdx),%eax\n\tleaq\t1(%rdx),%rdx\n\tmovb\t%al,-1(%rcx,%rdx,1)\n\tdecq\t%r8\n\tjnz\t.Loop_bcopy\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n\n.globl\tblst_sha256_hcopy\n\n.def\tblst_sha256_hcopy;\t.scl 2;\t.type 32;\t.endef\n.p2align\t4\nblst_sha256_hcopy:\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t%r8,0(%rcx)\n\tmovq\t%r9,8(%rcx)\n\tmovq\t%r10,16(%rcx)\n\tmovq\t%r11,24(%rcx)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n\n.section\t.pdata\n.p2align\t2\n.rva\t.LSEH_begin_blst_sha256_block_data_order_shaext\n.rva\t.LSEH_body_blst_sha256_block_data_order_shaext\n.rva\t.LSEH_info_blst_sha256_block_data_order_shaext_prologue\n\n.rva\t.LSEH_body_blst_sha256_block_data_order_shaext\n.rva\t.LSEH_epilogue_blst_sha256_block_data_order_shaext\n.rva\t.LSEH_info_blst_sha256_block_data_order_shaext_body\n\n.rva\t.LSEH_epilogue_blst_sha256_block_data_order_shaext\n.rva\t.LSEH_end_blst_sha256_block_data_order_shaext\n.rva\t.LSEH_info_blst_sha256_block_data_order_shaext_epilogue\n\n.rva\t.LSEH_begin_blst_sha256_block_data_order\n.rva\t.LSEH_body_blst_sha256_block_data_order\n.rva\t.LSEH_info_blst_sha256_block_data_order_prologue\n\n.rva\t.LSEH_body_blst_sha256_block_data_order\n.rva\t.LSEH_epilogue_blst_sha256_block_data_order\n.rva\t.LSEH_info_blst_sha256_block_data_order_body\n\n.rva\t.LSEH_epilogue_blst_sha256_block_data_order\n.rva\t.LSEH_end_blst_sha256_block_data_order\n.rva\t.LSEH_info_blst_sha256_block_data_order_epilogue\n\n.section\t.xdata\n.p2align\t3\n.LSEH_info_blst_sha256_block_data_order_shaext_prologue:\n.byte\t1,4,6,0x05\n.byte\t4,0x74,2,0\n.byte\t4,0x64,3,0\n.byte\t4,0x53\n.byte\t1,0x50\n.long\t0,0\n.LSEH_info_blst_sha256_block_data_order_shaext_body:\n.byte\t1,0,17,85\n.byte\t0x00,0x68,0x00,0x00\n.byte\t0x00,0x78,0x01,0x00\n.byte\t0x00,0x88,0x02,0x00\n.byte\t0x00,0x98,0x03,0x00\n.byte\t0x00,0xa8,0x04,0x00\n.byte\t0x00,0x74,0x0c,0x00\n.byte\t0x00,0x64,0x0d,0x00\n.byte\t0x00,0x53\n.byte\t0x00,0x92\n.byte\t0x00,0x50\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_blst_sha256_block_data_order_shaext_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n.LSEH_info_blst_sha256_block_data_order_prologue:\n.byte\t1,4,6,0x05\n.byte\t4,0x74,2,0\n.byte\t4,0x64,3,0\n.byte\t4,0x53\n.byte\t1,0x50\n.long\t0,0\n.LSEH_info_blst_sha256_block_data_order_body:\n.byte\t1,0,25,133\n.byte\t0x00,0x68,0x00,0x00\n.byte\t0x00,0x78,0x01,0x00\n.byte\t0x00,0x88,0x02,0x00\n.byte\t0x00,0x98,0x03,0x00\n.byte\t0x00,0xf4,0x0b,0x00\n.byte\t0x00,0xe4,0x0c,0x00\n.byte\t0x00,0xd4,0x0d,0x00\n.byte\t0x00,0xc4,0x0e,0x00\n.byte\t0x00,0x34,0x0f,0x00\n.byte\t0x00,0x74,0x12,0x00\n.byte\t0x00,0x64,0x13,0x00\n.byte\t0x00,0x53\n.byte\t0x00,0xf2\n.byte\t0x00,0x50\n.byte\t0x00,0x00,0x00,0x00,0x00,0x00\n.byte\t0x00,0x00,0x00,0x00\n.LSEH_info_blst_sha256_block_data_order_epilogue:\n.byte\t1,0,4,0\n.byte\t0x00,0x74,0x01,0x00\n.byte\t0x00,0x64,0x02,0x00\n.byte\t0x00,0x00,0x00,0x00\n\n"
  },
  {
    "path": "build/elf/add_mod_256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tadd_mod_256\n.hidden\tadd_mod_256\n.type\tadd_mod_256,%function\n.align\t5\nadd_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tadds\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tadcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tadcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n.size\tadd_mod_256,.-add_mod_256\n\n.globl\tmul_by_3_mod_256\n.hidden\tmul_by_3_mod_256\n.type\tmul_by_3_mod_256,%function\n.align\t5\nmul_by_3_mod_256:\n\thint\t#34\n\tldp\tx12,x13,[x1]\n\tldp\tx14,x15,[x1,#16]\n\n\tadds\tx8,x12,x12\n\tldp\tx4,x5,[x2]\n\tadcs\tx9,x13,x13\n\tldp\tx6,x7,[x2,#16]\n\tadcs\tx10,x14,x14\n\tadcs\tx11,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\tadds\tx8,x8,x12\n\tadcs\tx9,x9,x13\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n.size\tmul_by_3_mod_256,.-mul_by_3_mod_256\n\n.globl\tlshift_mod_256\n.hidden\tlshift_mod_256\n.type\tlshift_mod_256,%function\n.align\t5\nlshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\n.Loop_lshift_mod_256:\n\tadds\tx8,x8,x8\n\tsub\tx2,x2,#1\n\tadcs\tx9,x9,x9\n\tadcs\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx12,x8,x4\n\tsbcs\tx13,x9,x5\n\tsbcs\tx14,x10,x6\n\tsbcs\tx15,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x12,lo\n\tcsel\tx9,x9,x13,lo\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\n\tcbnz\tx2,.Loop_lshift_mod_256\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n.size\tlshift_mod_256,.-lshift_mod_256\n\n.globl\trshift_mod_256\n.hidden\trshift_mod_256\n.type\trshift_mod_256,%function\n.align\t5\nrshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\n.Loop_rshift:\n\tadds\tx12,x8,x4\n\tsub\tx2,x2,#1\n\tadcs\tx13,x9,x5\n\tadcs\tx14,x10,x6\n\tadcs\tx15,x11,x7\n\tadc\tx3,xzr,xzr\n\ttst\tx8,#1\n\n\tcsel\tx12,x12,x8,ne\n\tcsel\tx13,x13,x9,ne\n\tcsel\tx14,x14,x10,ne\n\tcsel\tx15,x15,x11,ne\n\tcsel\tx3,x3,xzr,ne\n\n\textr\tx8,x13,x12,#1\n\textr\tx9,x14,x13,#1\n\textr\tx10,x15,x14,#1\n\textr\tx11,x3,x15,#1\n\n\tcbnz\tx2,.Loop_rshift\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n.size\trshift_mod_256,.-rshift_mod_256\n\n.globl\tcneg_mod_256\n.hidden\tcneg_mod_256\n.type\tcneg_mod_256,%function\n.align\t5\ncneg_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx4,x5,[x3]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx12,x4,x8\n\tldp\tx6,x7,[x3,#16]\n\torr\tx4,x8,x9\n\tsbcs\tx13,x5,x9\n\torr\tx5,x10,x11\n\tsbcs\tx14,x6,x10\n\torr\tx3,x4,x5\n\tsbc\tx15,x7,x11\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx8,x8,x12,eq\n\tcsel\tx9,x9,x13,eq\n\tcsel\tx10,x10,x14,eq\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x15,eq\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n.size\tcneg_mod_256,.-cneg_mod_256\n\n.globl\tsub_mod_256\n.hidden\tsub_mod_256\n.type\tsub_mod_256,%function\n.align\t5\nsub_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tstp\tx8,x9,[x0]\n\tadc\tx11,x11,x7\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n.size\tsub_mod_256,.-sub_mod_256\n\n.globl\tcheck_mod_256\n.hidden\tcheck_mod_256\n.type\tcheck_mod_256,%function\n.align\t5\ncheck_mod_256:\n\tldp\tx8,x9,[x0]\n\tldp\tx10,x11,[x0,#16]\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tsubs\txzr,x8,x4\n\tsbcs\txzr,x9,x5\n\torr\tx8,x8,x9\n\tsbcs\txzr,x10,x6\n\torr\tx8,x8,x10\n\tsbcs\txzr,x11,x7\n\torr\tx8,x8,x11\n\tsbc\tx1,xzr,xzr\n\n\tcmp\tx8,#0\n\tmov\tx0,#1\n\tcsel\tx0,x0,xzr,ne\n\tand\tx0,x0,x1\n\n\tret\n.size\tcheck_mod_256,.-check_mod_256\n\n.globl\tadd_n_check_mod_256\n.hidden\tadd_n_check_mod_256\n.type\tadd_n_check_mod_256,%function\n.align\t5\nadd_n_check_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tadds\tx8,x8,x12\n\tldp\tx4,x5,[x3]\n\tadcs\tx9,x9,x13\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n.size\tadd_n_check_mod_256,.-add_n_check_mod_256\n\n.globl\tsub_n_check_mod_256\n.hidden\tsub_n_check_mod_256\n.type\tsub_n_check_mod_256,%function\n.align\t5\nsub_n_check_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tsubs\tx8,x8,x12\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tadc\tx11,x11,x7\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n.size\tsub_n_check_mod_256,.-sub_n_check_mod_256\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/add_mod_256-x86_64.s",
    "content": ".text\t\n\n.globl\tadd_mod_256\n.hidden\tadd_mod_256\n.type\tadd_mod_256,@function\n.align\t32\nadd_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n.Loaded_a_add_mod_256:\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tmovq\t%r8,%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r9,%rsi\n\tadcq\t24(%rdx),%r11\n\tsbbq\t%rdx,%rdx\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tadd_mod_256,.-add_mod_256\n\n\n.globl\tmul_by_3_mod_256\n.hidden\tmul_by_3_mod_256\n.type\tmul_by_3_mod_256,@function\n.align\t32\nmul_by_3_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%rsi,%rdx\n\tmovq\t24(%rsi),%r11\n\n\tcall\t__lshift_mod_256\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tjmp\t.Loaded_a_add_mod_256\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_by_3_mod_256,.-mul_by_3_mod_256\n\n.type\t__lshift_mod_256,@function\n.align\t32\n__lshift_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tmovq\t%r8,%rax\n\tadcq\t%r10,%r10\n\tmovq\t%r9,%rsi\n\tadcq\t%r11,%r11\n\tsbbq\t%r12,%r12\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tcmovcq\t%rbx,%r10\n\tcmovcq\t%rbp,%r11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rax\n\tlfence\n\tjmpq\t*%rax\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__lshift_mod_256,.-__lshift_mod_256\n\n\n.globl\tlshift_mod_256\n.hidden\tlshift_mod_256\n.type\tlshift_mod_256,@function\n.align\t32\nlshift_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n.Loop_lshift_mod_256:\n\tcall\t__lshift_mod_256\n\tdecl\t%edx\n\tjnz\t.Loop_lshift_mod_256\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tlshift_mod_256,.-lshift_mod_256\n\n\n.globl\trshift_mod_256\n.hidden\trshift_mod_256\n.type\trshift_mod_256,@function\n.align\t32\nrshift_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rbp\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n.Loop_rshift_mod_256:\n\tmovq\t%rbp,%r8\n\tandq\t$1,%rbp\n\tmovq\t0(%rcx),%rax\n\tnegq\t%rbp\n\tmovq\t8(%rcx),%rsi\n\tmovq\t16(%rcx),%rbx\n\n\tandq\t%rbp,%rax\n\tandq\t%rbp,%rsi\n\tandq\t%rbp,%rbx\n\tandq\t24(%rcx),%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tadcq\t%rbx,%r10\n\tadcq\t%rbp,%r11\n\tsbbq\t%rax,%rax\n\n\tshrq\t$1,%r8\n\tmovq\t%r9,%rbp\n\tshrq\t$1,%r9\n\tmovq\t%r10,%rbx\n\tshrq\t$1,%r10\n\tmovq\t%r11,%rsi\n\tshrq\t$1,%r11\n\n\tshlq\t$63,%rbp\n\tshlq\t$63,%rbx\n\torq\t%r8,%rbp\n\tshlq\t$63,%rsi\n\torq\t%rbx,%r9\n\tshlq\t$63,%rax\n\torq\t%rsi,%r10\n\torq\t%rax,%r11\n\n\tdecl\t%edx\n\tjnz\t.Loop_rshift_mod_256\n\n\tmovq\t%rbp,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\trshift_mod_256,.-rshift_mod_256\n\n\n.globl\tcneg_mod_256\n.hidden\tcneg_mod_256\n.type\tcneg_mod_256,@function\n.align\t32\ncneg_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r12\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%r12,%r8\n\tmovq\t24(%rsi),%r11\n\torq\t%r9,%r12\n\torq\t%r10,%r12\n\torq\t%r11,%r12\n\tmovq\t$-1,%rbp\n\n\tmovq\t0(%rcx),%rax\n\tcmovnzq\t%rbp,%r12\n\tmovq\t8(%rcx),%rsi\n\tmovq\t16(%rcx),%rbx\n\tandq\t%r12,%rax\n\tmovq\t24(%rcx),%rbp\n\tandq\t%r12,%rsi\n\tandq\t%r12,%rbx\n\tandq\t%r12,%rbp\n\n\tsubq\t%r8,%rax\n\tsbbq\t%r9,%rsi\n\tsbbq\t%r10,%rbx\n\tsbbq\t%r11,%rbp\n\n\torq\t%rdx,%rdx\n\n\tcmovzq\t%r8,%rax\n\tcmovzq\t%r9,%rsi\n\tmovq\t%rax,0(%rdi)\n\tcmovzq\t%r10,%rbx\n\tmovq\t%rsi,8(%rdi)\n\tcmovzq\t%r11,%rbp\n\tmovq\t%rbx,16(%rdi)\n\tmovq\t%rbp,24(%rdi)\n\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tcneg_mod_256,.-cneg_mod_256\n\n\n.globl\tsub_mod_256\n.hidden\tsub_mod_256\n.type\tsub_mod_256,@function\n.align\t32\nsub_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%rax\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%rsi\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbp\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rsi\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsub_mod_256,.-sub_mod_256\n\n\n.globl\tcheck_mod_256\n.hidden\tcheck_mod_256\n.type\tcheck_mod_256,@function\n.align\t32\ncheck_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%rax\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\n\tmovq\t%rax,%r8\n\torq\t%r9,%rax\n\torq\t%r10,%rax\n\torq\t%r11,%rax\n\n\tsubq\t0(%rsi),%r8\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t%rsi,%rsi\n\n\tmovq\t$1,%rdx\n\tcmpq\t$0,%rax\n\tcmovneq\t%rdx,%rax\n\tandq\t%rsi,%rax\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tcheck_mod_256,.-check_mod_256\n\n\n.globl\tadd_n_check_mod_256\n.hidden\tadd_n_check_mod_256\n.type\tadd_n_check_mod_256,@function\n.align\t32\nadd_n_check_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tmovq\t%r8,%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r9,%rsi\n\tadcq\t24(%rdx),%r11\n\tsbbq\t%rdx,%rdx\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\torq\t%r10,%r8\n\tmovq\t$1,%rax\n\tcmovzq\t%r8,%rax\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tadd_n_check_mod_256,.-add_n_check_mod_256\n\n\n.globl\tsub_n_check_mod_256\n.hidden\tsub_n_check_mod_256\n.type\tsub_n_check_mod_256,@function\n.align\t32\nsub_n_check_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%rax\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%rsi\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbp\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rsi\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\torq\t%r10,%r8\n\tmovq\t$1,%rax\n\tcmovzq\t%r8,%rax\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsub_n_check_mod_256,.-sub_n_check_mod_256\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/add_mod_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tadd_mod_384\n.hidden\tadd_mod_384\n.type\tadd_mod_384,%function\n.align\t5\nadd_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tadd_mod_384,.-add_mod_384\n\n.type\t__add_mod_384,%function\n.align\t5\n__add_mod_384:\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n__add_mod_384_ab_are_loaded:\n\tadds\tx10,x10,x16\n\tadcs\tx11,x11,x17\n\tadcs\tx12,x12,x19\n\tadcs\tx13,x13,x20\n\tadcs\tx14,x14,x21\n\tadcs\tx15,x15,x22\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.globl\tadd_mod_384x\n.hidden\tadd_mod_384x\n.type\tadd_mod_384x,%function\n.align\t5\nadd_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tadd_mod_384x,.-add_mod_384x\n\n.globl\trshift_mod_384\n.hidden\trshift_mod_384\n.type\trshift_mod_384,%function\n.align\t5\nrshift_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n.Loop_rshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__rshift_mod_384\n\tcbnz\tx2,.Loop_rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\trshift_mod_384,.-rshift_mod_384\n\n.type\t__rshift_mod_384,%function\n.align\t5\n__rshift_mod_384:\n\tsbfx\tx22,x10,#0,#1\n\tand\tx16,x22,x4\n\tand\tx17,x22,x5\n\tadds\tx10,x10,x16\n\tand\tx19,x22,x6\n\tadcs\tx11,x11,x17\n\tand\tx20,x22,x7\n\tadcs\tx12,x12,x19\n\tand\tx21,x22,x8\n\tadcs\tx13,x13,x20\n\tand\tx22,x22,x9\n\tadcs\tx14,x14,x21\n\textr\tx10,x11,x10,#1\t// a[0:5] >>= 1\n\tadcs\tx15,x15,x22\n\textr\tx11,x12,x11,#1\n\tadc\tx22,xzr,xzr\n\textr\tx12,x13,x12,#1\n\textr\tx13,x14,x13,#1\n\textr\tx14,x15,x14,#1\n\textr\tx15,x22,x15,#1\n\tret\n.size\t__rshift_mod_384,.-__rshift_mod_384\n\n.globl\tdiv_by_2_mod_384\n.hidden\tdiv_by_2_mod_384\n.type\tdiv_by_2_mod_384,%function\n.align\t5\ndiv_by_2_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tdiv_by_2_mod_384,.-div_by_2_mod_384\n\n.globl\tlshift_mod_384\n.hidden\tlshift_mod_384\n.type\tlshift_mod_384,%function\n.align\t5\nlshift_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n.Loop_lshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__lshift_mod_384\n\tcbnz\tx2,.Loop_lshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tlshift_mod_384,.-lshift_mod_384\n\n.type\t__lshift_mod_384,%function\n.align\t5\n__lshift_mod_384:\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n.size\t__lshift_mod_384,.-__lshift_mod_384\n\n.globl\tmul_by_3_mod_384\n.hidden\tmul_by_3_mod_384\n.type\tmul_by_3_mod_384,%function\n.align\t5\nmul_by_3_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_3_mod_384,.-mul_by_3_mod_384\n\n.globl\tmul_by_8_mod_384\n.hidden\tmul_by_8_mod_384\n.type\tmul_by_8_mod_384,%function\n.align\t5\nmul_by_8_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_8_mod_384,.-mul_by_8_mod_384\n\n.globl\tmul_by_3_mod_384x\n.hidden\tmul_by_3_mod_384x\n.type\tmul_by_3_mod_384x,%function\n.align\t5\nmul_by_3_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1,#48]\n\tldp\tx19,x20,[x1,#64]\n\tldp\tx21,x22,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_3_mod_384x,.-mul_by_3_mod_384x\n\n.globl\tmul_by_8_mod_384x\n.hidden\tmul_by_8_mod_384x\n.type\tmul_by_8_mod_384x,%function\n.align\t5\nmul_by_8_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_8_mod_384x,.-mul_by_8_mod_384x\n\n.globl\tcneg_mod_384\n.hidden\tcneg_mod_384\n.type\tcneg_mod_384,%function\n.align\t5\ncneg_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx4,x5,[x3]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx6,x7,[x3,#16]\n\n\tsubs\tx16,x4,x10\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx8,x9,[x3,#32]\n\torr\tx3,x10,x11\n\tsbcs\tx17,x5,x11\n\torr\tx3,x3,x12\n\tsbcs\tx19,x6,x12\n\torr\tx3,x3,x13\n\tsbcs\tx20,x7,x13\n\torr\tx3,x3,x14\n\tsbcs\tx21,x8,x14\n\torr\tx3,x3,x15\n\tsbc\tx22,x9,x15\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx10,x10,x16,eq\n\tcsel\tx11,x11,x17,eq\n\tcsel\tx12,x12,x19,eq\n\tcsel\tx13,x13,x20,eq\n\tstp\tx10,x11,[x0]\n\tcsel\tx14,x14,x21,eq\n\tstp\tx12,x13,[x0,#16]\n\tcsel\tx15,x15,x22,eq\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tcneg_mod_384,.-cneg_mod_384\n\n.globl\tsub_mod_384\n.hidden\tsub_mod_384\n.type\tsub_mod_384,%function\n.align\t5\nsub_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsub_mod_384,.-sub_mod_384\n\n.type\t__sub_mod_384,%function\n.align\t5\n__sub_mod_384:\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n\tsubs\tx10,x10,x16\n\tsbcs\tx11,x11,x17\n\tsbcs\tx12,x12,x19\n\tsbcs\tx13,x13,x20\n\tsbcs\tx14,x14,x21\n\tsbcs\tx15,x15,x22\n\tsbc\tx3,xzr,xzr\n\n\tand\tx16,x4,x3\n\tand\tx17,x5,x3\n\tadds\tx10,x10,x16\n\tand\tx19,x6,x3\n\tadcs\tx11,x11,x17\n\tand\tx20,x7,x3\n\tadcs\tx12,x12,x19\n\tand\tx21,x8,x3\n\tadcs\tx13,x13,x20\n\tand\tx22,x9,x3\n\tadcs\tx14,x14,x21\n\tadc\tx15,x15,x22\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tsub_mod_384x\n.hidden\tsub_mod_384x\n.type\tsub_mod_384x,%function\n.align\t5\nsub_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsub_mod_384x,.-sub_mod_384x\n\n.globl\tmul_by_1_plus_i_mod_384x\n.hidden\tmul_by_1_plus_i_mod_384x\n.type\tmul_by_1_plus_i_mod_384x,%function\n.align\t5\nmul_by_1_plus_i_mod_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\tadd\tx2,x1,#48\n\n\tbl\t__sub_mod_384\t\t\t// a->re - a->im\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\t// a->re + a->im\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x\n\n.globl\tsgn0_pty_mod_384\n.hidden\tsgn0_pty_mod_384\n.type\tsgn0_pty_mod_384,%function\n.align\t5\nsgn0_pty_mod_384:\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx0,x10,#1\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx3,x3,xzr\n\n\tmvn\tx3,x3\n\tand\tx3,x3,#2\n\torr\tx0,x0,x3\n\n\tret\n.size\tsgn0_pty_mod_384,.-sgn0_pty_mod_384\n\n.globl\tsgn0_pty_mod_384x\n.hidden\tsgn0_pty_mod_384x\n.type\tsgn0_pty_mod_384x,%function\n.align\t5\nsgn0_pty_mod_384x:\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx2,x10,#1\n\torr\tx3,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx3,x3,x12\n\tadcs\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tldp\tx10,x11,[x0,#48]\n\tldp\tx12,x13,[x0,#64]\n\tldp\tx14,x15,[x0,#80]\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx2,x2,x16\n\n\tand\tx0,x10,#1\n\torr\tx1,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx1,x1,x12\n\tadcs\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx0,x0,x16\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t// pack sign and parity\n\n\tret\n.size\tsgn0_pty_mod_384x,.-sgn0_pty_mod_384x\n.globl\tvec_select_32\n.hidden\tvec_select_32\n.type\tvec_select_32,%function\n.align\t5\nvec_select_32:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d}, [x1]\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d}, [x2]\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tst1\t{v0.2d, v1.2d}, [x0]\n\tret\n.size\tvec_select_32,.-vec_select_32\n.globl\tvec_select_48\n.hidden\tvec_select_48\n.type\tvec_select_48,%function\n.align\t5\nvec_select_48:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n.size\tvec_select_48,.-vec_select_48\n.globl\tvec_select_96\n.hidden\tvec_select_96\n.type\tvec_select_96,%function\n.align\t5\nvec_select_96:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n.size\tvec_select_96,.-vec_select_96\n.globl\tvec_select_192\n.hidden\tvec_select_192\n.type\tvec_select_192,%function\n.align\t5\nvec_select_192:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n.size\tvec_select_192,.-vec_select_192\n.globl\tvec_select_144\n.hidden\tvec_select_144\n.type\tvec_select_144,%function\n.align\t5\nvec_select_144:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n.size\tvec_select_144,.-vec_select_144\n.globl\tvec_select_288\n.hidden\tvec_select_288\n.type\tvec_select_288,%function\n.align\t5\nvec_select_288:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n.size\tvec_select_288,.-vec_select_288\n.globl\tvec_prefetch\n.hidden\tvec_prefetch\n.type\tvec_prefetch,%function\n.align\t5\nvec_prefetch:\n\thint\t#34\n\tadd\tx1, x1, x0\n\tsub\tx1, x1, #1\n\tmov\tx2, #64\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tprfm\tpldl1keep, [x0]\n\tret\n.size\tvec_prefetch,.-vec_prefetch\n.globl\tvec_is_zero_16x\n.hidden\tvec_is_zero_16x\n.type\tvec_is_zero_16x,%function\n.align\t5\nvec_is_zero_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tlsr\tx1, x1, #4\n\tsub\tx1, x1, #1\n\tcbz\tx1, .Loop_is_zero_done\n\n.Loop_is_zero:\n\tld1\t{v1.2d}, [x0], #16\n\torr\tv0.16b, v0.16b, v1.16b\n\tsub\tx1, x1, #1\n\tcbnz\tx1, .Loop_is_zero\n\n.Loop_is_zero_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n.size\tvec_is_zero_16x,.-vec_is_zero_16x\n.globl\tvec_is_equal_16x\n.hidden\tvec_is_equal_16x\n.type\tvec_is_equal_16x,%function\n.align\t5\nvec_is_equal_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tld1\t{v1.2d}, [x1], #16\n\tlsr\tx2, x2, #4\n\teor\tv0.16b, v0.16b, v1.16b\n\n.Loop_is_equal:\n\tsub\tx2, x2, #1\n\tcbz\tx2, .Loop_is_equal_done\n\tld1\t{v1.2d}, [x0], #16\n\tld1\t{v2.2d}, [x1], #16\n\teor\tv1.16b, v1.16b, v2.16b\n\torr\tv0.16b, v0.16b, v1.16b\n\tb\t.Loop_is_equal\n\tnop\n\n.Loop_is_equal_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n.size\tvec_is_equal_16x,.-vec_is_equal_16x\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/add_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\tadd_mod_384\n.hidden\tadd_mod_384\n.type\tadd_mod_384,@function\n.align\t32\nadd_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tcall\t__add_mod_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tadd_mod_384,.-add_mod_384\n\n.type\t__add_mod_384,@function\n.align\t32\n__add_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__add_mod_384_a_is_loaded:\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__add_mod_384,.-__add_mod_384\n\n.globl\tadd_mod_384x\n.hidden\tadd_mod_384x\n.type\tadd_mod_384x,@function\n.align\t32\nadd_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$24,%rsp\n.cfi_adjust_cfa_offset\t24\n\n\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rdx),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__add_mod_384\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__add_mod_384\n\n\tmovq\t24+0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t24+8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24+16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t24+24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t24+32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t24+40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24+48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tadd_mod_384x,.-add_mod_384x\n\n\n.globl\trshift_mod_384\n.hidden\trshift_mod_384\n.type\trshift_mod_384,@function\n.align\t32\nrshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n.Loop_rshift_mod_384:\n\tcall\t__rshift_mod_384\n\tdecl\t%edx\n\tjnz\t.Loop_rshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\trshift_mod_384,.-rshift_mod_384\n\n.type\t__rshift_mod_384,@function\n.align\t32\n__rshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rsi\n\tmovq\t0(%rcx),%r14\n\tandq\t%r8,%rsi\n\tmovq\t8(%rcx),%r15\n\tnegq\t%rsi\n\tmovq\t16(%rcx),%rax\n\tandq\t%rsi,%r14\n\tmovq\t24(%rcx),%rbx\n\tandq\t%rsi,%r15\n\tmovq\t32(%rcx),%rbp\n\tandq\t%rsi,%rax\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t40(%rcx),%rsi\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rax\n\tadcq\t%r11,%rbx\n\tadcq\t%r12,%rbp\n\tadcq\t%r13,%rsi\n\tsbbq\t%r13,%r13\n\n\tshrq\t$1,%r14\n\tmovq\t%r15,%r8\n\tshrq\t$1,%r15\n\tmovq\t%rax,%r9\n\tshrq\t$1,%rax\n\tmovq\t%rbx,%r10\n\tshrq\t$1,%rbx\n\tmovq\t%rbp,%r11\n\tshrq\t$1,%rbp\n\tmovq\t%rsi,%r12\n\tshrq\t$1,%rsi\n\tshlq\t$63,%r8\n\tshlq\t$63,%r9\n\torq\t%r14,%r8\n\tshlq\t$63,%r10\n\torq\t%r15,%r9\n\tshlq\t$63,%r11\n\torq\t%rax,%r10\n\tshlq\t$63,%r12\n\torq\t%rbx,%r11\n\tshlq\t$63,%r13\n\torq\t%rbp,%r12\n\torq\t%rsi,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r14\n\tlfence\n\tjmpq\t*%r14\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__rshift_mod_384,.-__rshift_mod_384\n\n.globl\tdiv_by_2_mod_384\n.hidden\tdiv_by_2_mod_384\n.type\tdiv_by_2_mod_384,@function\n.align\t32\ndiv_by_2_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t%rdx,%rcx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tcall\t__rshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tdiv_by_2_mod_384,.-div_by_2_mod_384\n\n\n.globl\tlshift_mod_384\n.hidden\tlshift_mod_384\n.type\tlshift_mod_384,@function\n.align\t32\nlshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n.Loop_lshift_mod_384:\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%r14\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%r15\n\tadcq\t%r12,%r12\n\tmovq\t%r10,%rax\n\tadcq\t%r13,%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdi,%rdi\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdi\n\n\tmovq\t(%rsp),%rdi\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tcmovcq\t%rbx,%r11\n\tcmovcq\t%rbp,%r12\n\tcmovcq\t%rsi,%r13\n\n\tdecl\t%edx\n\tjnz\t.Loop_lshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tlshift_mod_384,.-lshift_mod_384\n\n.type\t__lshift_mod_384,@function\n.align\t32\n__lshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%r14\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%r15\n\tadcq\t%r12,%r12\n\tmovq\t%r10,%rax\n\tadcq\t%r13,%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tcmovcq\t%rbx,%r11\n\tcmovcq\t%rbp,%r12\n\tcmovcq\t%rsi,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__lshift_mod_384,.-__lshift_mod_384\n\n\n.globl\tmul_by_3_mod_384\n.hidden\tmul_by_3_mod_384\n.type\tmul_by_3_mod_384,@function\n.align\t32\nmul_by_3_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_by_3_mod_384,.-mul_by_3_mod_384\n\n.globl\tmul_by_8_mod_384\n.hidden\tmul_by_8_mod_384\n.type\tmul_by_8_mod_384,@function\n.align\t32\nmul_by_8_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_by_8_mod_384,.-mul_by_8_mod_384\n\n\n.globl\tmul_by_3_mod_384x\n.hidden\tmul_by_3_mod_384x\n.type\tmul_by_3_mod_384x,@function\n.align\t32\nmul_by_3_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rdi),%rdi\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rsi),%r8\n\tmovq\t56(%rsi),%r9\n\tmovq\t64(%rsi),%r10\n\tmovq\t72(%rsi),%r11\n\tmovq\t80(%rsi),%r12\n\tmovq\t88(%rsi),%r13\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t$48,%rdx\n\taddq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_by_3_mod_384x,.-mul_by_3_mod_384x\n\n.globl\tmul_by_8_mod_384x\n.hidden\tmul_by_8_mod_384x\n.type\tmul_by_8_mod_384x,@function\n.align\t32\nmul_by_8_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rsi\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48+0(%rsi),%r8\n\tmovq\t48+8(%rsi),%r9\n\tmovq\t48+16(%rsi),%r10\n\tmovq\t48+24(%rsi),%r11\n\tmovq\t48+32(%rsi),%r12\n\tmovq\t48+40(%rsi),%r13\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t%r8,48+0(%rdi)\n\tmovq\t%r9,48+8(%rdi)\n\tmovq\t%r10,48+16(%rdi)\n\tmovq\t%r11,48+24(%rdi)\n\tmovq\t%r12,48+32(%rdi)\n\tmovq\t%r13,48+40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_by_8_mod_384x,.-mul_by_8_mod_384x\n\n\n.globl\tcneg_mod_384\n.hidden\tcneg_mod_384\n.type\tcneg_mod_384,@function\n.align\t32\ncneg_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdx\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%rdx,%r8\n\tmovq\t24(%rsi),%r11\n\torq\t%r9,%rdx\n\tmovq\t32(%rsi),%r12\n\torq\t%r10,%rdx\n\tmovq\t40(%rsi),%r13\n\torq\t%r11,%rdx\n\tmovq\t$-1,%rsi\n\torq\t%r12,%rdx\n\torq\t%r13,%rdx\n\n\tmovq\t0(%rcx),%r14\n\tcmovnzq\t%rsi,%rdx\n\tmovq\t8(%rcx),%r15\n\tmovq\t16(%rcx),%rax\n\tandq\t%rdx,%r14\n\tmovq\t24(%rcx),%rbx\n\tandq\t%rdx,%r15\n\tmovq\t32(%rcx),%rbp\n\tandq\t%rdx,%rax\n\tmovq\t40(%rcx),%rsi\n\tandq\t%rdx,%rbx\n\tmovq\t0(%rsp),%rcx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\tsubq\t%r8,%r14\n\tsbbq\t%r9,%r15\n\tsbbq\t%r10,%rax\n\tsbbq\t%r11,%rbx\n\tsbbq\t%r12,%rbp\n\tsbbq\t%r13,%rsi\n\n\torq\t%rcx,%rcx\n\n\tcmovzq\t%r8,%r14\n\tcmovzq\t%r9,%r15\n\tcmovzq\t%r10,%rax\n\tmovq\t%r14,0(%rdi)\n\tcmovzq\t%r11,%rbx\n\tmovq\t%r15,8(%rdi)\n\tcmovzq\t%r12,%rbp\n\tmovq\t%rax,16(%rdi)\n\tcmovzq\t%r13,%rsi\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rsi,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tcneg_mod_384,.-cneg_mod_384\n\n\n.globl\tsub_mod_384\n.hidden\tsub_mod_384\n.type\tsub_mod_384,@function\n.align\t32\nsub_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tcall\t__sub_mod_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsub_mod_384,.-sub_mod_384\n\n.type\t__sub_mod_384,@function\n.align\t32\n__sub_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tsub_mod_384x\n.hidden\tsub_mod_384x\n.type\tsub_mod_384x,@function\n.align\t32\nsub_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$24,%rsp\n.cfi_adjust_cfa_offset\t24\n\n\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rdx),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__sub_mod_384\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__sub_mod_384\n\n\tmovq\t24+0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t24+8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24+16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t24+24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t24+32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t24+40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24+48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsub_mod_384x,.-sub_mod_384x\n.globl\tmul_by_1_plus_i_mod_384x\n.hidden\tmul_by_1_plus_i_mod_384x\n.type\tmul_by_1_plus_i_mod_384x,@function\n.align\t32\nmul_by_1_plus_i_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$56,%rsp\n.cfi_adjust_cfa_offset\t56\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rbx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rcx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\tmovq\t%rdi,48(%rsp)\n\tsbbq\t%rdi,%rdi\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rbx\n\tsbbq\t80(%rsi),%rcx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rsi,%rsi\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t0(%rdx),%r8\n\tmovq\t%r9,8(%rsp)\n\tmovq\t8(%rdx),%r9\n\tmovq\t%r10,16(%rsp)\n\tmovq\t16(%rdx),%r10\n\tmovq\t%r11,24(%rsp)\n\tmovq\t24(%rdx),%r11\n\tmovq\t%r12,32(%rsp)\n\tandq\t%rsi,%r8\n\tmovq\t32(%rdx),%r12\n\tmovq\t%r13,40(%rsp)\n\tandq\t%rsi,%r9\n\tmovq\t40(%rdx),%r13\n\tandq\t%rsi,%r10\n\tandq\t%rsi,%r11\n\tandq\t%rsi,%r12\n\tandq\t%rsi,%r13\n\tmovq\t48(%rsp),%rsi\n\n\taddq\t%r8,%r14\n\tmovq\t0(%rsp),%r8\n\tadcq\t%r9,%r15\n\tmovq\t8(%rsp),%r9\n\tadcq\t%r10,%rax\n\tmovq\t16(%rsp),%r10\n\tadcq\t%r11,%rbx\n\tmovq\t24(%rsp),%r11\n\tadcq\t%r12,%rcx\n\tmovq\t32(%rsp),%r12\n\tadcq\t%r13,%rbp\n\tmovq\t40(%rsp),%r13\n\n\tmovq\t%r14,0(%rsi)\n\tmovq\t%r8,%r14\n\tmovq\t%r15,8(%rsi)\n\tmovq\t%rax,16(%rsi)\n\tmovq\t%r9,%r15\n\tmovq\t%rbx,24(%rsi)\n\tmovq\t%rcx,32(%rsi)\n\tmovq\t%r10,%rax\n\tmovq\t%rbp,40(%rsi)\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t%r11,%rbx\n\tsbbq\t8(%rdx),%r9\n\tsbbq\t16(%rdx),%r10\n\tmovq\t%r12,%rcx\n\tsbbq\t24(%rdx),%r11\n\tsbbq\t32(%rdx),%r12\n\tmovq\t%r13,%rbp\n\tsbbq\t40(%rdx),%r13\n\tsbbq\t$0,%rdi\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,48(%rsi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,56(%rsi)\n\tcmovcq\t%rcx,%r12\n\tmovq\t%r10,64(%rsi)\n\tcmovcq\t%rbp,%r13\n\tmovq\t%r11,72(%rsi)\n\tmovq\t%r12,80(%rsi)\n\tmovq\t%r13,88(%rsi)\n\n\tmovq\t56+0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t56+8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t56+16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t56+24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56+32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t56+40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56+48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x\n.globl\tsgn0_pty_mod_384\n.hidden\tsgn0_pty_mod_384\n.type\tsgn0_pty_mod_384,@function\n.align\t32\nsgn0_pty_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%rcx\n\tmovq\t40(%rdi),%rdx\n\n\txorq\t%rax,%rax\n\tmovq\t%r8,%rdi\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rsi),%r8\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%rdi\n\tandq\t$2,%rax\n\torq\t%rdi,%rax\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsgn0_pty_mod_384,.-sgn0_pty_mod_384\n\n.globl\tsgn0_pty_mod_384x\n.hidden\tsgn0_pty_mod_384x\n.type\tsgn0_pty_mod_384x,@function\n.align\t32\nsgn0_pty_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rdi),%r8\n\tmovq\t56(%rdi),%r9\n\tmovq\t64(%rdi),%r10\n\tmovq\t72(%rdi),%r11\n\tmovq\t80(%rdi),%rcx\n\tmovq\t88(%rdi),%rdx\n\n\tmovq\t%r8,%rbx\n\torq\t%r9,%r8\n\torq\t%r10,%r8\n\torq\t%r11,%r8\n\torq\t%rcx,%r8\n\torq\t%rdx,%r8\n\n\tleaq\t0(%rdi),%rax\n\txorq\t%rdi,%rdi\n\tmovq\t%rbx,%rbp\n\taddq\t%rbx,%rbx\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rsi),%rbx\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r8,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%rbp\n\tandq\t$2,%rdi\n\torq\t%rbp,%rdi\n\n\tmovq\t0(%rax),%r8\n\tmovq\t8(%rax),%r9\n\tmovq\t16(%rax),%r10\n\tmovq\t24(%rax),%r11\n\tmovq\t32(%rax),%rcx\n\tmovq\t40(%rax),%rdx\n\n\tmovq\t%r8,%rbx\n\torq\t%r9,%r8\n\torq\t%r10,%r8\n\torq\t%r11,%r8\n\torq\t%rcx,%r8\n\torq\t%rdx,%r8\n\n\txorq\t%rax,%rax\n\tmovq\t%rbx,%rbp\n\taddq\t%rbx,%rbx\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rsi),%rbx\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%rbx\n\n\tnotq\t%rax\n\n\ttestq\t%r8,%r8\n\tcmovzq\t%rdi,%rbp\n\n\ttestq\t%rbx,%rbx\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%rbp\n\tandq\t$2,%rax\n\torq\t%rbp,%rax\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsgn0_pty_mod_384x,.-sgn0_pty_mod_384x\n.globl\tvec_select_32\n.hidden\tvec_select_32\n.type\tvec_select_32,@function\n.align\t32\nvec_select_32:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t16(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t16(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t16(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-16(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-16(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-16(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-16(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_select_32,.-vec_select_32\n.globl\tvec_select_48\n.hidden\tvec_select_48\n.type\tvec_select_48,@function\n.align\t32\nvec_select_48:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t24(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t24(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t24(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-24(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-24(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-24(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-24(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-24(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-24(%rdi)\n\tpand\t%xmm4,%xmm0\n\tpand\t%xmm5,%xmm1\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_select_48,.-vec_select_48\n.globl\tvec_select_96\n.hidden\tvec_select_96\n.type\tvec_select_96,@function\n.align\t32\nvec_select_96:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t48(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t48(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t48(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-48(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-48(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-48(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-48(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-48(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-48(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-48(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-48(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-48(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-48(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-48(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-48(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-48(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-48(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-48(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-48(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_select_96,.-vec_select_96\n.globl\tvec_select_192\n.hidden\tvec_select_192\n.type\tvec_select_192,@function\n.align\t32\nvec_select_192:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t96(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t96(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t96(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t128+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t128+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t144+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t144+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,144-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t160+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t160+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,160-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,176-96(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_select_192,.-vec_select_192\n.globl\tvec_select_144\n.hidden\tvec_select_144\n.type\tvec_select_144,@function\n.align\t32\nvec_select_144:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t72(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t72(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t72(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tpand\t%xmm5,%xmm1\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-72(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_select_144,.-vec_select_144\n.globl\tvec_select_288\n.hidden\tvec_select_288\n.type\tvec_select_288,@function\n.align\t32\nvec_select_288:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t144(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t144(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t144(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t128+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t128+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t144+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t144+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,144-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t160+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t160+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,160-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t176+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t176+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,176-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t192+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t192+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,192-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t208+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t208+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,208-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t224+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t224+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,224-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t240+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t240+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,240-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t256+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t256+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,256-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,272-144(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_select_288,.-vec_select_288\n.globl\tvec_prefetch\n.hidden\tvec_prefetch\n.type\tvec_prefetch,@function\n.align\t32\nvec_prefetch:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tleaq\t-1(%rdi,%rsi,1),%rsi\n\tmovq\t$64,%rax\n\txorq\t%r8,%r8\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tprefetchnta\t(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_prefetch,.-vec_prefetch\n.globl\tvec_is_zero_16x\n.hidden\tvec_is_zero_16x\n.type\tvec_is_zero_16x,@function\n.align\t32\nvec_is_zero_16x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tshrl\t$4,%esi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdi),%xmm0\n\tleaq\t16(%rdi),%rdi\n\n.Loop_is_zero:\n\tdecl\t%esi\n\tjz\t.Loop_is_zero_done\n\tmovdqu\t(%rdi),%xmm1\n\tleaq\t16(%rdi),%rdi\n\tpor\t%xmm1,%xmm0\n\tjmp\t.Loop_is_zero\n\n.Loop_is_zero_done:\n\tpshufd\t$0x4e,%xmm0,%xmm1\n\tpor\t%xmm1,%xmm0\n.byte\t102,72,15,126,192\n\tincl\t%esi\n\ttestq\t%rax,%rax\n\tcmovnzl\t%esi,%eax\n\txorl\t$1,%eax\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_is_zero_16x,.-vec_is_zero_16x\n.globl\tvec_is_equal_16x\n.hidden\tvec_is_equal_16x\n.type\tvec_is_equal_16x,@function\n.align\t32\nvec_is_equal_16x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tshrl\t$4,%edx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdi),%xmm0\n\tmovdqu\t(%rsi),%xmm1\n\tsubq\t%rdi,%rsi\n\tleaq\t16(%rdi),%rdi\n\tpxor\t%xmm1,%xmm0\n\n.Loop_is_equal:\n\tdecl\t%edx\n\tjz\t.Loop_is_equal_done\n\tmovdqu\t(%rdi),%xmm1\n\tmovdqu\t(%rdi,%rsi,1),%xmm2\n\tleaq\t16(%rdi),%rdi\n\tpxor\t%xmm2,%xmm1\n\tpor\t%xmm1,%xmm0\n\tjmp\t.Loop_is_equal\n\n.Loop_is_equal_done:\n\tpshufd\t$0x4e,%xmm0,%xmm1\n\tpor\t%xmm1,%xmm0\n.byte\t102,72,15,126,192\n\tincl\t%edx\n\ttestq\t%rax,%rax\n\tcmovnzl\t%edx,%eax\n\txorl\t$1,%eax\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tvec_is_equal_16x,.-vec_is_equal_16x\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/add_mod_384x384-x86_64.s",
    "content": ".text\t\n\n.globl\tadd_mod_384x384\n.hidden\tadd_mod_384x384\n.type\tadd_mod_384x384,@function\n.align\t32\nadd_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\taddq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tadcq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tadcq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tadcq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tadcq\t48(%rdx),%r14\n\tmovq\t%r9,8(%rdi)\n\tadcq\t56(%rdx),%r15\n\tmovq\t%r10,16(%rdi)\n\tadcq\t64(%rdx),%rax\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r14,%r8\n\tadcq\t72(%rdx),%rbx\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r15,%r9\n\tadcq\t80(%rdx),%rbp\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%rax,%r10\n\tadcq\t88(%rdx),%rsi\n\tmovq\t%rbx,%r11\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%rbp,%r12\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%rbx\n\tsbbq\t32(%rcx),%rbp\n\tmovq\t%rsi,%r13\n\tsbbq\t40(%rcx),%rsi\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r8,%r14\n\tcmovcq\t%r9,%r15\n\tcmovcq\t%r10,%rax\n\tmovq\t%r14,48(%rdi)\n\tcmovcq\t%r11,%rbx\n\tmovq\t%r15,56(%rdi)\n\tcmovcq\t%r12,%rbp\n\tmovq\t%rax,64(%rdi)\n\tcmovcq\t%r13,%rsi\n\tmovq\t%rbx,72(%rdi)\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tadd_mod_384x384,.-add_mod_384x384\n\n.globl\tsub_mod_384x384\n.hidden\tsub_mod_384x384\n.type\tsub_mod_384x384,@function\n.align\t32\nsub_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsub_mod_384x384,.-sub_mod_384x384\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ct_inverse_mod_256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tct_inverse_mod_256\n.hidden\tct_inverse_mod_256\n.type\tct_inverse_mod_256, %function\n.align\t5\nct_inverse_mod_256:\n\thint\t#PACI_HINT\n\tstp\tx29, x30, [sp,#-10*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1040\n\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx6, x7, [x1,#8*2]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#16+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #16+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstr\tx0, [sp]\t\t// offload out_ptr\n\n\tldp\tx8, x9, [x2,#8*0]\n\tldp\tx10, x11, [x2,#8*2]\n\n\tstp\tx4, x5, [x1,#8*0]\t// copy input to |a|\n\tstp\tx6, x7, [x1,#8*2]\n\tstp\tx8, x9, [x1,#8*4]\t// copy modulus to |b|\n\tstp\tx10, x11, [x1,#8*6]\n\n\t////////////////////////////////////////// first iteration\n\tbl\t.Lab_approximation_31_256_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12,[x0,#8*8]\t\t// initialize |u| with |f0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12, [x0,#8*10]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tldr\tx8, [x1,#8*8]\t\t// |u|\n\tldr\tx9, [x1,#8*14]\t// |v|\n\tmadd\tx4, x16, x8, xzr\t// |u|*|f0|\n\tmadd\tx4, x17, x9, x4\t// |v|*|g0|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [x0,#8*4]\n\tstp\tx5, x5, [x0,#8*6]\n\n\tmadd\tx4, x12, x8, xzr\t// |u|*|f1|\n\tmadd\tx4, x13, x9, x4\t// |v|*|g1|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [x0,#8*10]\n\tstp\tx5, x5, [x0,#8*12]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\n\tstr\tx24, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\t\t// sign extension\n\tstp\tx24, x24, [x0,#8*4]\n\tstp\tx24, x24, [x0,#8*6]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\t////////////////////////////////////////// two[!] last iterations\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #47\t\t\t// 31 + 512 % 31\n\t//bl\t__ab_approximation_62_256\t// |a| and |b| are exact,\n\tldr\tx7, [x1,#8*0]\t\t// just load\n\tldr\tx11, [x1,#8*4]\n\tbl\t__inner_loop_62_256\n\n\tmov\tx16, x14\n\tmov\tx17, x15\n\tldr\tx0, [sp]\t\t\t// original out_ptr\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx20, x7, x17\t\t// figure out top-most limb\n\tldp\tx8, x9, [x3,#8*0]\n\tadc\tx23, x23, x25\n\tldp\tx10, x11, [x3,#8*2]\n\n\tadd\tx20, x20, x23\t\t// x20 is 1, 0 or -1\n\tasr\tx19, x20, #63\t\t// sign as mask\n\n\tand\tx23,   x8, x19\t\t// add mod<<256 conditionally\n\tand\tx24,   x9, x19\n\tadds\tx4, x4, x23\n\tand\tx25,   x10, x19\n\tadcs\tx5, x5, x24\n\tand\tx26,   x11, x19\n\tadcs\tx6, x6, x25\n\tadcs\tx7, x22,   x26\n\tadc\tx20, x20, xzr\t\t// x20 is 1, 0 or -1\n\n\tneg\tx19, x20\n\torr\tx20, x20, x19\t\t// excess bit or sign as mask\n\tasr\tx19, x19, #63\t\t// excess bit as mask\n\n\tand\tx8, x8, x20\t\t// mask |mod|\n\tand\tx9, x9, x20\n\tand\tx10, x10, x20\n\tand\tx11, x11, x20\n\n\teor\tx8, x8, x19\t\t// conditionally negate |mod|\n\teor\tx9, x9, x19\n\tadds\tx8, x8, x19, lsr#63\n\teor\tx10, x10, x19\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x19\n\tadcs\tx10, x10, xzr\n\tadc\tx11, x11, xzr\n\n\tadds\tx4, x4, x8\t// final adjustment for |mod|<<256\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*4]\n\tadc\tx7, x7, x11\n\tstp\tx6, x7, [x0,#8*6]\n\n\tadd\tsp, sp, #1040\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#10*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tct_inverse_mod_256,.-ct_inverse_mod_256\n\n////////////////////////////////////////////////////////////////////////\n.type\t__smul_256x63, %function\n.align\t5\n__smul_256x63:\n\tldp\tx4, x5, [x1,#8*0+64]\t// load |u| (or |v|)\n\tasr\tx14, x16, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx6, x7, [x1,#8*2+64]\n\teor\tx16, x16, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx22, [x1,#8*4+64]\n\n\teor\tx4, x4, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx16, x16, x14\n\teor\tx5, x5, x14\n\tadds\tx4, x4, x14, lsr#63\n\teor\tx6, x6, x14\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\teor\tx22, x22, x14\n\tumulh\tx19, x4, x16\n\tadcs\tx7, x7, xzr\n\tumulh\tx20, x5, x16\n\tadcs\tx22, x22, xzr\n\tumulh\tx21, x6, x16\n\tmul\tx4, x4, x16\n\tcmp\tx16, #0\n\tmul\tx5, x5, x16\n\tcsel\tx22, x22, xzr, ne\n\tmul\tx6, x6, x16\n\tadds\tx5, x5, x19\n\tmul\tx24, x7, x16\n\tadcs\tx6, x6, x20\n\tadcs\tx24, x24, x21\n\tadc\tx26, xzr, xzr\n\tldp\tx8, x9, [x1,#8*0+112]\t// load |u| (or |v|)\n\tasr\tx14, x17, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx10, x11, [x1,#8*2+112]\n\teor\tx17, x17, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx23, [x1,#8*4+112]\n\n\teor\tx8, x8, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx17, x17, x14\n\teor\tx9, x9, x14\n\tadds\tx8, x8, x14, lsr#63\n\teor\tx10, x10, x14\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x14\n\tadcs\tx10, x10, xzr\n\teor\tx23, x23, x14\n\tumulh\tx19, x8, x17\n\tadcs\tx11, x11, xzr\n\tumulh\tx20, x9, x17\n\tadcs\tx23, x23, xzr\n\tumulh\tx21, x10, x17\n\tadc\tx15, xzr, xzr\t\t// used in __smul_512x63_tail\n\tmul\tx8, x8, x17\n\tcmp\tx17, #0\n\tmul\tx9, x9, x17\n\tcsel\tx23, x23, xzr, ne\n\tmul\tx10, x10, x17\n\tadds\tx9, x9, x19\n\tmul\tx25, x11, x17\n\tadcs\tx10, x10, x20\n\tadcs\tx25, x25, x21\n\tadc\tx26, x26, xzr\n\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*0]\n\tadcs\tx24,   x24,   x25\n\tstp\tx6, x24, [x0,#8*2]\n\n\tret\n.size\t__smul_256x63,.-__smul_256x63\n\n.type\t__smul_512x63_tail, %function\n.align\t5\n__smul_512x63_tail:\n\tumulh\tx24, x7, x16\n\tldr\tx5, [x1,#8*19]\t// load rest of |v|\n\tadc\tx26, x26, xzr\n\tldp\tx6, x7, [x1,#8*20]\n\tand\tx22, x22, x16\n\n\tumulh\tx11, x11, x17\t// resume |v|*|g1| chain\n\n\tsub\tx24, x24, x22\t// tie up |u|*|f1| chain\n\tasr\tx25, x24, #63\n\n\teor\tx5, x5, x14\t// conditionally negate rest of |v|\n\teor\tx6, x6, x14\n\tadds\tx5, x5, x15\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\tumulh\tx19, x23,   x17\n\tadc\tx7, x7, xzr\n\tumulh\tx20, x5, x17\n\tadd\tx11, x11, x26\n\tumulh\tx21, x6, x17\n\n\tmul\tx4, x23,   x17\n\tmul\tx5, x5, x17\n\tadds\tx4, x4, x11\n\tmul\tx6, x6, x17\n\tadcs\tx5, x5, x19\n\tmul\tx22,   x7, x17\n\tadcs\tx6, x6, x20\n\tadcs\tx22,   x22,   x21\n\tadc\tx23, xzr, xzr\t\t// used in the final step\n\n\tadds\tx4, x4, x24\n\tadcs\tx5, x5, x25\n\tadcs\tx6, x6, x25\n\tstp\tx4, x5, [x0,#8*4]\n\tadcs\tx22,   x22,   x25\t// carry is used in the final step\n\tstp\tx6, x22,   [x0,#8*6]\n\n\tret\n.size\t__smul_512x63_tail,.-__smul_512x63_tail\n\n.type\t__smul_256_n_shift_by_31, %function\n.align\t5\n__smul_256_n_shift_by_31:\n\tldp\tx4, x5, [x1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx24, x12, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx6, x7, [x1,#8*2+0]\n\teor\tx25, x12, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx4, x4, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx5, x5, x24\n\tadds\tx4, x4, x24, lsr#63\n\teor\tx6, x6, x24\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x24\n\tumulh\tx19, x4, x25\n\tadcs\tx6, x6, xzr\n\tumulh\tx20, x5, x25\n\tadc\tx7, x7, xzr\n\tumulh\tx21, x6, x25\n\tand\tx24, x24, x25\n\tumulh\tx22, x7, x25\n\tneg\tx24, x24\n\n\tmul\tx4, x4, x25\n\tmul\tx5, x5, x25\n\tmul\tx6, x6, x25\n\tadds\tx5, x5, x19\n\tmul\tx7, x7, x25\n\tadcs\tx6, x6, x20\n\tadcs\tx7, x7, x21\n\tadc\tx22, x22, x24\n\tldp\tx8, x9, [x1,#8*0+32]\t// load |a| (or |b|)\n\tasr\tx24, x13, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx10, x11, [x1,#8*2+32]\n\teor\tx25, x13, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx8, x8, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx9, x9, x24\n\tadds\tx8, x8, x24, lsr#63\n\teor\tx10, x10, x24\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x24\n\tumulh\tx19, x8, x25\n\tadcs\tx10, x10, xzr\n\tumulh\tx20, x9, x25\n\tadc\tx11, x11, xzr\n\tumulh\tx21, x10, x25\n\tand\tx24, x24, x25\n\tumulh\tx23, x11, x25\n\tneg\tx24, x24\n\n\tmul\tx8, x8, x25\n\tmul\tx9, x9, x25\n\tmul\tx10, x10, x25\n\tadds\tx9, x9, x19\n\tmul\tx11, x11, x25\n\tadcs\tx10, x10, x20\n\tadcs\tx11, x11, x21\n\tadc\tx23, x23, x24\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tadcs\tx7, x7, x11\n\tadc\tx8, x22,   x23\n\n\textr\tx4, x5, x4, #31\n\textr\tx5, x6, x5, #31\n\textr\tx6, x7, x6, #31\n\tasr\tx23, x8, #63\t// result's sign as mask\n\textr\tx7, x8, x7, #31\n\n\teor\tx4, x4, x23\t// ensure the result is positive\n\teor\tx5, x5, x23\n\tadds\tx4, x4, x23, lsr#63\n\teor\tx6, x6, x23\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x23\n\tadcs\tx6, x6, xzr\n\tstp\tx4, x5, [x0,#8*0]\n\tadc\tx7, x7, xzr\n\tstp\tx6, x7, [x0,#8*2]\n\n\teor\tx12, x12, x23\t\t// adjust |f/g| accordingly\n\teor\tx13, x13, x23\n\tsub\tx12, x12, x23\n\tsub\tx13, x13, x23\n\n\tret\n.size\t__smul_256_n_shift_by_31,.-__smul_256_n_shift_by_31\n.type\t__ab_approximation_31_256, %function\n.align\t4\n__ab_approximation_31_256:\n\tldp\tx6, x7, [x1,#8*2]\n\tldp\tx10, x11, [x1,#8*6]\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx8, x9, [x1,#8*4]\n\n.Lab_approximation_31_256_loaded:\n\torr\tx19, x7, x11\t// check top-most limbs, ...\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x5, ne\n\torr\tx19, x7, x11\t// and ones before top-most, ...\n\tcsel\tx10, x10, x9, ne\n\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x4, ne\n\torr\tx19, x7, x11\t// and one more, ...\n\tcsel\tx10, x10, x8, ne\n\n\tclz\tx19, x19\n\tcmp\tx19, #64\n\tcsel\tx19, x19, xzr, ne\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tneg\tx20, x19\n\n\tlslv\tx7, x7, x19\t// align high limbs to the left\n\tlslv\tx11, x11, x19\n\tlsrv\tx6, x6, x20\n\tlsrv\tx10, x10, x20\n\tand\tx6, x6, x20, asr#6\n\tand\tx10, x10, x20, asr#6\n\torr\tx7, x7, x6\n\torr\tx11, x11, x10\n\n\tbfxil\tx7, x4, #0, #31\n\tbfxil\tx11, x8, #0, #31\n\n\tb\t__inner_loop_31_256\n\tret\n.size\t__ab_approximation_31_256,.-__ab_approximation_31_256\n\n.type\t__inner_loop_31_256, %function\n.align\t4\n__inner_loop_31_256:\n\tmov\tx2, #31\n\tmov\tx13, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx15, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx23,#0x7FFFFFFF7FFFFFFF\n\n.Loop_31_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x15\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx15, x15, x13,    hs\t// exchange |fg0| and |fg1|\n\tcsel\tx13, x13, x19,   hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x15, x22\n\tand\tx20, x23, x22\n\tsub\tx13, x13, x19\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx15, x15, x15\t// |f1|<<=1\n\tadd\tx13, x13, x20\n\tsub\tx15, x15, x23\n\tcbnz\tx2, .Loop_31_256\n\n\tmov\tx23, #0x7FFFFFFF\n\tubfx\tx12, x13, #0, #32\n\tubfx\tx13, x13, #32, #32\n\tubfx\tx14, x15, #0, #32\n\tubfx\tx15, x15, #32, #32\n\tsub\tx12, x12, x23\t\t// remove bias\n\tsub\tx13, x13, x23\n\tsub\tx14, x14, x23\n\tsub\tx15, x15, x23\n\n\tret\n.size\t__inner_loop_31_256,.-__inner_loop_31_256\n\n.type\t__inner_loop_62_256, %function\n.align\t4\n__inner_loop_62_256:\n\tmov\tx12, #1\t\t// |f0|=1\n\tmov\tx13, #0\t\t// |g0|=0\n\tmov\tx14, #0\t\t// |f1|=0\n\tmov\tx15, #1\t\t// |g1|=1\n\n.Loop_62_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x12\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tmov\tx20, x13\n\tcsel\tx12, x12, x14,       hs\t// exchange |f0| and |f1|\n\tcsel\tx14, x14, x19,     hs\n\tcsel\tx13, x13, x15,       hs\t// exchange |g0| and |g1|\n\tcsel\tx15, x15, x20,     hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x14, x22\n\tand\tx20, x15, x22\n\tadd\tx14, x14, x14\t\t// |f1|<<=1\n\tadd\tx15, x15, x15\t\t// |g1|<<=1\n\tsub\tx12, x12, x19\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx13, x13, x20\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, .Loop_62_256\n\n\tret\n.size\t__inner_loop_62_256,.-__inner_loop_62_256\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ct_inverse_mod_256-x86_64.s",
    "content": ".text\t\n\n.globl\tct_inverse_mod_256\n.hidden\tct_inverse_mod_256\n.type\tct_inverse_mod_256,@function\n.align\t32\nct_inverse_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$1072,%rsp\n.cfi_adjust_cfa_offset\t1072\n\n\n\tleaq\t48+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tmovq\t0(%rdx),%r12\n\tmovq\t8(%rdx),%r13\n\tmovq\t16(%rdx),%r14\n\tmovq\t24(%rdx),%r15\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rax,%rsi\n\n\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmovq\t%rdx,64(%rdi)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmovq\t%rdx,72(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\n\tmovq\t64(%rsi),%r8\n\tmovq\t104(%rsi),%r12\n\tmovq\t%r8,%r9\n\timulq\t0(%rsp),%r8\n\tmovq\t%r12,%r13\n\timulq\t8(%rsp),%r12\n\taddq\t%r12,%r8\n\tmovq\t%r8,32(%rdi)\n\tsarq\t$63,%r8\n\tmovq\t%r8,40(%rdi)\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r8,56(%rdi)\n\tmovq\t%r8,64(%rdi)\n\tleaq\t64(%rsi),%rsi\n\n\timulq\t%rdx,%r9\n\timulq\t%rcx,%r13\n\taddq\t%r13,%r9\n\tmovq\t%r9,72(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tmovq\t%r9,104(%rdi)\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\tsarq\t$63,%rbp\n\tmovq\t%rbp,40(%rdi)\n\tmovq\t%rbp,48(%rdi)\n\tmovq\t%rbp,56(%rdi)\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\n\txorq\t$256+64,%rsi\n\tmovl\t$47,%edx\n\n\tmovq\t0(%rsi),%r8\n\n\tmovq\t32(%rsi),%r10\n\n\tcall\t__inner_loop_62_256\n\n\n\n\n\n\n\n\tleaq\t64(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulq_512x63\n\tadcq\t%rbp,%rdx\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rdx\n\n\tmovq\t%rdx,%r8\n\tmovq\t%rdx,%r9\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tandq\t0(%rsi),%r8\n\tmovq\t%rdx,%r10\n\tandq\t8(%rsi),%r9\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%rdx\n\n\taddq\t%r8,%r12\n\tadcq\t%r9,%r13\n\tadcq\t%r10,%r14\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%rax\n\n\tmovq\t%rax,%rdx\n\tnegq\t%rax\n\torq\t%rax,%rdx\n\tsarq\t$63,%rax\n\n\tmovq\t%rdx,%r8\n\tmovq\t%rdx,%r9\n\tandq\t0(%rsi),%r8\n\tmovq\t%rdx,%r10\n\tandq\t8(%rsi),%r9\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rcx,%rcx\n\txorq\t%rax,%r9\n\tsubq\t%rax,%rcx\n\txorq\t%rax,%r10\n\txorq\t%rax,%rdx\n\taddq\t%rcx,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%rdx\n\n\taddq\t%r8,%r12\n\tadcq\t%r9,%r13\n\tadcq\t%r10,%r14\n\tadcq\t%rdx,%r15\n\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\n\tleaq\t1072(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-1072-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tct_inverse_mod_256,.-ct_inverse_mod_256\n.type\t__smulq_512x63,@function\n.align\t32\n__smulq_512x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%rbp\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%rbp\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%rbp\n\n\tmulq\t%rbx\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%rdx,%r11\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t40(%rsi),%r8\n\tmovq\t48(%rsi),%r9\n\tmovq\t56(%rsi),%r10\n\tmovq\t64(%rsi),%r11\n\tmovq\t72(%rsi),%r12\n\tmovq\t80(%rsi),%r13\n\tmovq\t88(%rsi),%r14\n\tmovq\t96(%rsi),%r15\n\n\tmovq\t%rcx,%rdx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rcx\n\taddq\t%rax,%rcx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\n\tmulq\t%rcx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rcx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rcx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rcx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rcx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rcx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rcx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\timulq\t%rcx\n\taddq\t%rax,%r15\n\tadcq\t$0,%rdx\n\n\tmovq\t%rbp,%rbx\n\tsarq\t$63,%rbp\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t%rbx,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t%rbp,%r14\n\tadcq\t%rbp,%r15\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_512x63,.-__smulq_512x63\n\n.type\t__smulq_256x63,@function\n.align\t32\n__smulq_256x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%rbp\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%rbp\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%rbp\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t%rcx,%rdx\n\tmovq\t40+0(%rsi),%r12\n\tmovq\t40+8(%rsi),%r13\n\tmovq\t40+16(%rsi),%r14\n\tmovq\t40+24(%rsi),%r15\n\tmovq\t40+32(%rsi),%rcx\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\txorq\t%rdx,%rcx\n\taddq\t%r12,%rax\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rcx\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tmovq\t%rdx,%r13\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tandq\t%rbx,%rcx\n\tnegq\t%rcx\n\tmulq\t%rbx\n\taddq\t%rax,%r15\n\tadcq\t%rdx,%rcx\n\taddq\t%r12,%r8\n\tadcq\t%r13,%r9\n\tadcq\t%r14,%r10\n\tadcq\t%r15,%r11\n\tadcq\t%rcx,%rbp\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_256x63,.-__smulq_256x63\n.type\t__smulq_256_n_shift_by_31,@function\n.align\t32\n__smulq_256_n_shift_by_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,0(%rdi)\n\tmovq\t%rcx,8(%rdi)\n\tmovq\t%rdx,%rbp\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\n\tmovq\t%rbp,%rbx\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t32+0(%rsi),%r12\n\tmovq\t32+8(%rsi),%r13\n\tmovq\t32+16(%rsi),%r14\n\tmovq\t32+24(%rsi),%r15\n\n\tmovq\t%rcx,%rbx\n\tsarq\t$63,%rcx\n\txorq\t%rax,%rax\n\tsubq\t%rcx,%rax\n\n\txorq\t%rcx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rcx,%r12\n\txorq\t%rcx,%r13\n\txorq\t%rcx,%r14\n\txorq\t%rcx,%r15\n\taddq\t%r12,%rax\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tandq\t%rbx,%rcx\n\tnegq\t%rcx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tmulq\t%rbx\n\taddq\t%rax,%r15\n\tadcq\t%rdx,%rcx\n\taddq\t%r12,%r8\n\tadcq\t%r13,%r9\n\tadcq\t%r14,%r10\n\tadcq\t%r15,%r11\n\tadcq\t%rcx,%rbp\n\n\tmovq\t0(%rdi),%rdx\n\tmovq\t8(%rdi),%rcx\n\n\tshrdq\t$31,%r9,%r8\n\tshrdq\t$31,%r10,%r9\n\tshrdq\t$31,%r11,%r10\n\tshrdq\t$31,%rbp,%r11\n\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\txorq\t%rbp,%rdx\n\txorq\t%rbp,%rcx\n\taddq\t%rax,%rdx\n\taddq\t%rax,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_256_n_shift_by_31,.-__smulq_256_n_shift_by_31\n.type\t__ab_approximation_31_256,@function\n.align\t32\n__ab_approximation_31_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t24(%rsi),%r9\n\tmovq\t56(%rsi),%r11\n\tmovq\t16(%rsi),%rbx\n\tmovq\t48(%rsi),%rbp\n\tmovq\t8(%rsi),%r8\n\tmovq\t40(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t0(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t32(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r9\n\tcmovzq\t%r10,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tmovl\t$0x7FFFFFFF,%eax\n\tandq\t%rax,%r8\n\tandq\t%rax,%r10\n\tnotq\t%rax\n\tandq\t%rax,%r9\n\tandq\t%rax,%r11\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\n\tjmp\t__inner_loop_31_256\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__ab_approximation_31_256,.-__ab_approximation_31_256\n.type\t__inner_loop_31_256,@function\n.align\t32\n__inner_loop_31_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rcx\n\tmovq\t$0x800000007FFFFFFF,%r13\n\tmovq\t$0x7FFFFFFF7FFFFFFF,%r15\n\n.Loop_31_256:\n\tcmpq\t%r10,%r8\n\tmovq\t%r8,%rax\n\tmovq\t%r10,%rbx\n\tmovq\t%rcx,%rbp\n\tmovq\t%r13,%r14\n\tcmovbq\t%r10,%r8\n\tcmovbq\t%rax,%r10\n\tcmovbq\t%r13,%rcx\n\tcmovbq\t%rbp,%r13\n\n\tsubq\t%r10,%r8\n\tsubq\t%r13,%rcx\n\taddq\t%r15,%rcx\n\n\ttestq\t$1,%rax\n\tcmovzq\t%rax,%r8\n\tcmovzq\t%rbx,%r10\n\tcmovzq\t%rbp,%rcx\n\tcmovzq\t%r14,%r13\n\n\tshrq\t$1,%r8\n\taddq\t%r13,%r13\n\tsubq\t%r15,%r13\n\tsubl\t$1,%edx\n\tjnz\t.Loop_31_256\n\n\tshrq\t$32,%r15\n\tmovl\t%ecx,%edx\n\tmovl\t%r13d,%r12d\n\tshrq\t$32,%rcx\n\tshrq\t$32,%r13\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\tsubq\t%r15,%r12\n\tsubq\t%r15,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__inner_loop_31_256,.-__inner_loop_31_256\n\n.type\t__inner_loop_62_256,@function\n.align\t32\n__inner_loop_62_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovl\t%edx,%r15d\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t%rdx,%r13\n\tmovq\t%rdx,%r14\n\n.Loop_62_256:\n\txorq\t%rax,%rax\n\ttestq\t%r14,%r8\n\tmovq\t%r10,%rbx\n\tcmovnzq\t%r10,%rax\n\tsubq\t%r8,%rbx\n\tmovq\t%r8,%rbp\n\tsubq\t%rax,%r8\n\tcmovcq\t%rbx,%r8\n\tcmovcq\t%rbp,%r10\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrq\t$1,%r8\n\ttestq\t%r14,%rbp\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%r15d\n\tjnz\t.Loop_62_256\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__inner_loop_62_256,.-__inner_loop_62_256\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ct_inverse_mod_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tct_inverse_mod_384\n.hidden\tct_inverse_mod_384\n.type\tct_inverse_mod_384, %function\n.align\t5\nct_inverse_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1056\n\n\tldp\tx22,   x4, [x1,#8*0]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx7, x8, [x1,#8*4]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#32+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #32+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstp\tx0, x3, [sp]\t\t// offload out_ptr, nx_ptr\n\n\tldp\tx9, x10, [x2,#8*0]\n\tldp\tx11, x12, [x2,#8*2]\n\tldp\tx13, x14, [x2,#8*4]\n\n\tstp\tx22,   x4, [x1,#8*0]\t// copy input to |a|\n\tstp\tx5, x6, [x1,#8*2]\n\tstp\tx7, x8, [x1,#8*4]\n\tstp\tx9, x10, [x1,#8*6]\t// copy modulus to |b|\n\tstp\tx11, x12, [x1,#8*8]\n\tstp\tx13, x14, [x1,#8*10]\n\n\t////////////////////////////////////////// first iteration\n\tmov\tx2, #62\n\tbl\t.Lab_approximation_62_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15,[x0,#8*12]\t\t// initialize |u| with |f0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15, [x0,#8*14]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tldr\tx7, [x1,#8*12]\t// |u|\n\tldr\tx8, [x1,#8*20]\t// |v|\n\tmul\tx3, x20, x7\t\t// |u|*|f0|\n\tsmulh\tx4, x20, x7\n\tmul\tx5, x21, x8\t\t// |v|*|g0|\n\tsmulh\tx6, x21, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*6]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [x0,#8*8]\n\tstp\tx5, x5, [x0,#8*10]\n\n\tmul\tx3, x15, x7\t\t// |u|*|f1|\n\tsmulh\tx4, x15, x7\n\tmul\tx5, x16, x8\t\t// |v|*|g1|\n\tsmulh\tx6, x16, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*14]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [x0,#8*16]\n\tstp\tx5, x5, [x0,#8*18]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\n\tstr\tx27, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\t\t// sign extension\n\tstp\tx27, x27, [x0,#8*6]\n\tstp\tx27, x27, [x0,#8*8]\n\tstp\tx27, x27, [x0,#8*10]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\t////////////////////////////////////////// iteration before last\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldp\tx3, x8, [x1,#8*0]\t// just load\n\tldp\tx9, x14, [x1,#8*6]\n\tbl\t__inner_loop_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tstr\tx3, [x0,#8*0]\n\tstr\tx9, [x0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f0|\n\tmov\tx21, x16\t\t\t// exact |g0|\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*12\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f1|\n\tmov\tx21, x16\t\t\t// exact |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\t////////////////////////////////////////// last iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #24\t\t\t// 768 % 62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldr\tx3, [x1,#8*0]\t\t// just load\n\teor\tx8, x8, x8\n\tldr\tx9, [x1,#8*6]\n\teor\tx14, x14, x14\n\tbl\t__inner_loop_62\n\n\tmov\tx20, x17\n\tmov\tx21, x19\n\tldp\tx0, x15, [sp]\t\t\t// original out_ptr and n_ptr\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx23, x8, x21\t\t// figure out top-most limb\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x15,#8*0]\t// load |mod|\n\tadd\tx23, x23, x26\t\t// x23 is 1, 0 or -1\n\tldp\tx11, x12, [x15,#8*2]\n\tasr\tx22, x23, #63\t\t// sign as mask\n\tldp\tx13, x14, [x15,#8*4]\n\n\tand\tx26,   x9, x22\t\t// add mod<<384 conditionally\n\tand\tx27,   x10, x22\n\tadds\tx3, x3, x26\n\tand\tx28,   x11, x22\n\tadcs\tx4, x4, x27\n\tand\tx2,   x12, x22\n\tadcs\tx5, x5, x28\n\tand\tx26,   x13, x22\n\tadcs\tx6, x6, x2\n\tand\tx27,   x14, x22\n\tadcs\tx7, x7, x26\n\tadcs\tx8, x25,   x27\n\tadc\tx23, x23, xzr\t\t// x23 is 1, 0 or -1\n\n\tneg\tx22, x23\n\torr\tx23, x23, x22\t\t// excess bit or sign as mask\n\tasr\tx22, x22, #63\t\t// excess bit as mask\n\n\tand\tx9, x9, x23\t\t// mask |mod|\n\tand\tx10, x10, x23\n\tand\tx11, x11, x23\n\tand\tx12, x12, x23\n\tand\tx13, x13, x23\n\tand\tx14, x14, x23\n\n\teor\tx9,  x9, x22\t// conditionally negate |mod|\n\teor\tx10,  x10, x22\n\tadds\tx9,  x9, x22, lsr#63\n\teor\tx11,  x11, x22\n\tadcs\tx10,  x10, xzr\n\teor\tx12,  x12, x22\n\tadcs\tx11,  x11, xzr\n\teor\tx13, x13, x22\n\tadcs\tx12,  x12, xzr\n\teor\tx14, x14, x22\n\tadcs\tx13, x13, xzr\n\tadc\tx14, x14, xzr\n\n\tadds\tx3, x3, x9\t// final adjustment for |mod|<<384\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*8]\n\tadc\tx8, x8, x14\n\tstp\tx7, x8, [x0,#8*10]\n\n\tadd\tsp, sp, #1056\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tct_inverse_mod_384,.-ct_inverse_mod_384\n\n////////////////////////////////////////////////////////////////////////\n// see corresponding commentary in ctx_inverse_mod_384-x86_64...\n.type\t__smul_384x63, %function\n.align\t5\n__smul_384x63:\n\tldp\tx3, x4, [x1,#8*0+96]\t// load |u| (or |v|)\n\tasr\tx17, x20, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx5, x6, [x1,#8*2+96]\n\teor\tx20, x20, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx7, x8, [x1,#8*4+96]\n\n\teor\tx3, x3, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx25, [x1,#8*6+96]\n\tsub\tx20, x20, x17\n\teor\tx4, x4, x17\n\tadds\tx3, x3, x17, lsr#63\n\teor\tx5, x5, x17\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x17\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x3, x20\n\teor\tx8, x8, x17\n\tumulh\tx23, x4, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x5, x20\n\teor\tx25, x25, x17\n\tmul\tx3, x3, x20\n\tadcs\tx8, x8, xzr\n\tmul\tx4, x4, x20\n\tadcs\tx25, x25, xzr\n\tcmp\tx20, #0\n\tmul\tx5, x5, x20\n\tcsel\tx25, x25, xzr, ne\n\tadds\tx4, x4, x22\n\tumulh\tx22, x6, x20\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x7, x20\n\tmul\tx6, x6, x20\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x24\n\tmul\tx27,x8, x20\n\tadcs\tx7, x7, x22\n\tadcs\tx27,x27,x23\n\tadc\tx2, xzr, xzr\n\tldp\tx9, x10, [x1,#8*0+160]\t// load |u| (or |v|)\n\tasr\tx17, x21, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx11, x12, [x1,#8*2+160]\n\teor\tx21, x21, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx13, x14, [x1,#8*4+160]\n\n\teor\tx9, x9, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx26, [x1,#8*6+160]\n\tsub\tx21, x21, x17\n\teor\tx10, x10, x17\n\tadds\tx9, x9, x17, lsr#63\n\teor\tx11, x11, x17\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x17\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x17\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x9, x21\n\teor\tx14, x14, x17\n\tumulh\tx23, x10, x21\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x11, x21\n\teor\tx26, x26, x17\n\tmul\tx9, x9, x21\n\tadcs\tx14, x14, xzr\n\tmul\tx10, x10, x21\n\tadcs\tx26, x26, xzr\n\tadc\tx19, xzr, xzr\t\t// used in __smul_768x63_tail\n\tcmp\tx21, #0\n\tmul\tx11, x11, x21\n\tcsel\tx26, x26, xzr, ne\n\tadds\tx10, x10, x22\n\tumulh\tx22, x12, x21\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x13, x21\n\tmul\tx12, x12, x21\n\tmul\tx13, x13, x21\n\tadcs\tx12, x12, x24\n\tmul\tx28,x14, x21\n\tadcs\tx13, x13, x22\n\tadcs\tx28,x28,x23\n\tadc\tx2, x2, xzr\n\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*2]\n\tadcs\tx27,   x27,   x28\n\tstp\tx7, x27,   [x0,#8*4]\n\n\tret\n.size\t__smul_384x63,.-__smul_384x63\n\n.type\t__smul_768x63_tail, %function\n.align\t5\n__smul_768x63_tail:\n\tumulh\tx27, x8, x20\n\tldr\tx4, [x1,#8*27]// load rest of |v|\n\tadc\tx2, x2, xzr\n\tldp\tx5, x6, [x1,#8*28]\n\tand\tx25, x25, x20\n\tldp\tx7, x8, [x1,#8*30]\n\tsub\tx27, x27, x25\t// tie up |u|*|f1| chain\n\n\tumulh\tx14, x14, x21\t// resume |v|*|g1| chain\n\teor\tx4, x4, x17\t// conditionally negate rest of |v|\n\teor\tx5, x5, x17\n\teor\tx6, x6, x17\n\tadds\tx4, x4, x19\n\teor\tx7, x7, x17\n\tadcs\tx5, x5, xzr\n\teor\tx8, x8, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x26,   x21\n\tadcs\tx7, x7, xzr\n\tumulh\tx23, x4, x21\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x21\n\tadd\tx14, x14, x2\n\tumulh\tx25, x6, x21\n\tasr\tx28, x27, #63\n\tumulh\tx2, x7, x21\n\tmul\tx3, x26,   x21\n\tmul\tx4, x4, x21\n\tmul\tx5, x5, x21\n\tadds\tx3, x3, x14\n\tmul\tx6, x6, x21\n\tadcs\tx4, x4, x22\n\tmul\tx7, x7, x21\n\tadcs\tx5, x5, x23\n\tmul\tx22,   x8, x21\n\tadcs\tx6, x6, x24\n\tadcs\tx7, x7, x25\n\tadcs\tx25,   x22, x2\n\tadc\tx26, xzr, xzr\t\t// used in the final step\n\n\tadds\tx3, x3, x27\n\tadcs\tx4, x4, x28\n\tadcs\tx5, x5, x28\n\tadcs\tx6, x6, x28\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x28\n\tstp\tx5, x6, [x0,#8*8]\n\tadcs\tx25,   x25,   x28\t// carry is used in the final step\n\tstp\tx7, x25,   [x0,#8*10]\n\n\tret\n.size\t__smul_768x63_tail,.-__smul_768x63_tail\n\n.type\t__smul_384_n_shift_by_62, %function\n.align\t5\n__smul_384_n_shift_by_62:\n\tldp\tx3, x4, [x1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx28, x15, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx5, x6, [x1,#8*2+0]\n\teor\tx2, x15, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx7, x8, [x1,#8*4+0]\n\n\teor\tx3, x3, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tumulh\tx22, x3, x2\n\tadcs\tx6, x6, xzr\n\tumulh\tx23, x4, x2\n\teor\tx8, x8, x28\n\tmul\tx3, x3, x2\n\tadcs\tx7, x7, xzr\n\tmul\tx4, x4, x2\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x6, x2\n\tadds\tx4, x4, x22\n\tmul\tx5, x5, x2\n\tumulh\tx22, x7, x2\n\tneg\tx28, x28\n\tmul\tx6, x6, x2\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x8, x2\n\tmul\tx7, x7, x2\n\tadcs\tx6, x6, x24\n\tmul\tx8, x8, x2\n\tadcs\tx7, x7, x25\n\tadcs\tx8, x8, x22\n\tadc\tx27, x23, x28\n\tldp\tx9, x10, [x1,#8*0+48]\t// load |a| (or |b|)\n\tasr\tx28, x16, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx11, x12, [x1,#8*2+48]\n\teor\tx2, x16, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx13, x14, [x1,#8*4+48]\n\n\teor\tx9, x9, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx10, x10, x28\n\tadds\tx9, x9, x28, lsr#63\n\teor\tx11, x11, x28\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x28\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x28\n\tumulh\tx22, x9, x2\n\tadcs\tx12, x12, xzr\n\tumulh\tx23, x10, x2\n\teor\tx14, x14, x28\n\tmul\tx9, x9, x2\n\tadcs\tx13, x13, xzr\n\tmul\tx10, x10, x2\n\tadc\tx14, x14, xzr\n\n\tumulh\tx24, x11, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x12, x2\n\tadds\tx10, x10, x22\n\tmul\tx11, x11, x2\n\tumulh\tx22, x13, x2\n\tneg\tx28, x28\n\tmul\tx12, x12, x2\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x14, x2\n\tmul\tx13, x13, x2\n\tadcs\tx12, x12, x24\n\tmul\tx14, x14, x2\n\tadcs\tx13, x13, x25\n\tadcs\tx14, x14, x22\n\tadc\tx28, x23, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x27,   x28\n\n\textr\tx3, x4, x3, #62\n\textr\tx4, x5, x4, #62\n\textr\tx5, x6, x5, #62\n\tasr\tx28, x9, #63\n\textr\tx6, x7, x6, #62\n\textr\tx7, x8, x7, #62\n\textr\tx8, x9, x8, #62\n\n\teor\tx3, x3, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x28\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x0,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x0,#8*4]\n\n\teor\tx15, x15, x28\n\teor\tx16, x16, x28\n\tsub\tx15, x15, x28\n\tsub\tx16, x16, x28\n\n\tret\n.size\t__smul_384_n_shift_by_62,.-__smul_384_n_shift_by_62\n.type\t__ab_approximation_62, %function\n.align\t4\n__ab_approximation_62:\n\tldp\tx7, x8, [x1,#8*4]\n\tldp\tx13, x14, [x1,#8*10]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx11, x12, [x1,#8*8]\n\n.Lab_approximation_62_loaded:\n\torr\tx22, x8, x14\t// check top-most limbs, ...\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx22, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tldp\tx3, x4, [x1,#8*0]\n\tldp\tx9, x10, [x1,#8*6]\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx22, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx22, x8, x14\n\tcsel\tx13, x13, x10, ne\n\n\tclz\tx22, x22\n\tcmp\tx22, #64\n\tcsel\tx22, x22, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx23, x22\n\n\tlslv\tx8, x8, x22\t// align high limbs to the left\n\tlslv\tx14, x14, x22\n\tlsrv\tx7, x7, x23\n\tlsrv\tx13, x13, x23\n\tand\tx7, x7, x23, asr#6\n\tand\tx13, x13, x23, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tb\t__inner_loop_62\n\tret\n.size\t__ab_approximation_62,.-__ab_approximation_62\n.type\t__inner_loop_62, %function\n.align\t4\n__inner_loop_62:\n\tmov\tx15, #1\t\t// |f0|=1\n\tmov\tx16, #0\t\t// |g0|=0\n\tmov\tx17, #0\t\t// |f1|=0\n\tmov\tx19, #1\t\t// |g1|=1\n\n.Loop_62:\n\tsbfx\tx28, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tsubs\tx24, x9, x3\t// |b_|-|a_|\n\tand\tx22, x9, x28\n\tsbc\tx25, x14, x8\n\tand\tx23, x14, x28\n\tsubs\tx26, x3, x22\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx22, x15\n\tsbcs\tx27, x8, x23\n\tmov\tx23, x16\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx14, x14, x8, hs\n\tcsel\tx3, x26, x24, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx8, x27, x25, hs\n\tcsel\tx15, x15, x17,       hs\t// exchange |f0| and |f1|\n\tcsel\tx17, x17, x22,     hs\n\tcsel\tx16, x16, x19,       hs\t// exchange |g0| and |g1|\n\tcsel\tx19, x19, x23,     hs\n\textr\tx3, x8, x3, #1\n\tlsr\tx8, x8, #1\n\tand\tx22, x17, x28\n\tand\tx23, x19, x28\n\tadd\tx17, x17, x17\t\t// |f1|<<=1\n\tadd\tx19, x19, x19\t\t// |g1|<<=1\n\tsub\tx15, x15, x22\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx16, x16, x23\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, .Loop_62\n\n\tret\n.size\t__inner_loop_62,.-__inner_loop_62\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ct_is_square_mod_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tct_is_square_mod_384\n.hidden\tct_is_square_mod_384\n.type\tct_is_square_mod_384, %function\n.align\t5\nct_is_square_mod_384:\n\thint\t#PACI_HINT\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #512\n\n\tldp\tx3, x4, [x0,#8*0]\t\t// load input\n\tldp\tx5, x6, [x0,#8*2]\n\tldp\tx7, x8, [x0,#8*4]\n\n\tadd\tx0, sp, #255\t// find closest 256-byte-aligned spot\n\tand\tx0, x0, #-256\t// in the frame...\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\n\tldp\tx9, x10, [x1,#8*0]\t\t// load modulus\n\tldp\tx11, x12, [x1,#8*2]\n\tldp\tx13, x14, [x1,#8*4]\n\n\tstp\tx3, x4, [x0,#8*6]\t// copy input to |a|\n\tstp\tx5, x6, [x0,#8*8]\n\tstp\tx7, x8, [x0,#8*10]\n\tstp\tx9, x10, [x0,#8*0]\t// copy modulus to |b|\n\tstp\tx11, x12, [x0,#8*2]\n\tstp\tx13, x14, [x0,#8*4]\n\n\teor\tx2, x2, x2\t\t\t// init the .Legendre symbol\n\tmov\tx15, #24\t\t\t// 24 is 768/30-1\n\tb\t.Loop_is_square\n\n.align\t4\n.Loop_is_square:\n\tbl\t__ab_approximation_30\n\tsub\tx15, x15, #1\n\n\teor\tx1, x0, #128\t\t// pointer to dst |b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,csp,x1\n#endif\n\tbl\t__smul_384_n_shift_by_30\n\n\tmov\tx19, x16\t\t\t// |f0|\n\tmov\tx20, x17\t\t\t// |g0|\n\tadd\tx1,x1,#8*6\n\tbl\t__smul_384_n_shift_by_30\n\n\tldp\tx9, x10, [x1,#-8*6]\n\teor\tx0, x0, #128\t\t// flip-flop src |a|b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\tand\tx27, x27, x9\t\t// if |a| was negative,\n\tadd\tx2, x2, x27, lsr#1\t\t// adjust |L|\n\n\tcbnz\tx15, .Loop_is_square\n\n\t////////////////////////////////////////// last iteration\n\t//bl\t__ab_approximation_30\t\t// |a| and |b| are exact,\n\t//ldr\tx8, [x0,#8*6]\t\t// and loaded\n\t//ldr\tx14, [x0,#8*0]\n\tmov\tx15, #48\t\t\t// 48 is 768%30 + 30\n\tbl\t__inner_loop_48\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tand\tx0, x2, #1\n\teor\tx0, x0, #1\n\n\tadd\tsp, sp, #512\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tct_is_square_mod_384,.-ct_is_square_mod_384\n\n.type\t__smul_384_n_shift_by_30, %function\n.align\t5\n__smul_384_n_shift_by_30:\n\tldp\tx3, x4, [x0,#8*0+0]\t// load |b| (or |a|)\n\tasr\tx27, x20, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx5, x6, [x0,#8*2+0]\n\teor\tx20, x20, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx7, x8, [x0,#8*4+0]\n\n\teor\tx3, x3, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx20, x20, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tumulh\tx21, x3, x20\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x4, x20\n\teor\tx8, x8, x27\n\tumulh\tx23, x5, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x6, x20\n\tadc\tx8, x8, xzr\n\n\tumulh\tx25, x7, x20\n\tand\tx28, x20, x27\n\tumulh\tx26, x8, x20\n\tneg\tx28, x28\n\tmul\tx3, x3, x20\n\tmul\tx4, x4, x20\n\tmul\tx5, x5, x20\n\tadds\tx4, x4, x21\n\tmul\tx6, x6, x20\n\tadcs\tx5, x5, x22\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x23\n\tmul\tx8, x8, x20\n\tadcs\tx7, x7, x24\n\tadcs\tx8, x8 ,x25\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x0,#8*0+48]\t// load |b| (or |a|)\n\tasr\tx27, x19, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx11, x12, [x0,#8*2+48]\n\teor\tx19, x19, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx13, x14, [x0,#8*4+48]\n\n\teor\tx9, x9, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx19, x19, x27\n\teor\tx10, x10, x27\n\tadds\tx9, x9, x27, lsr#63\n\teor\tx11, x11, x27\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x27\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x27\n\tumulh\tx21, x9, x19\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x10, x19\n\teor\tx14, x14, x27\n\tumulh\tx23, x11, x19\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x12, x19\n\tadc\tx14, x14, xzr\n\n\tumulh\tx25, x13, x19\n\tand\tx28, x19, x27\n\tumulh\tx27, x14, x19\n\tneg\tx28, x28\n\tmul\tx9, x9, x19\n\tmul\tx10, x10, x19\n\tmul\tx11, x11, x19\n\tadds\tx10, x10, x21\n\tmul\tx12, x12, x19\n\tadcs\tx11, x11, x22\n\tmul\tx13, x13, x19\n\tadcs\tx12, x12, x23\n\tmul\tx14, x14, x19\n\tadcs\tx13, x13, x24\n\tadcs\tx14, x14 ,x25\n\tadc\tx27, x27, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x26,   x27\n\n\textr\tx3, x4, x3, #30\n\textr\tx4, x5, x4, #30\n\textr\tx5, x6, x5, #30\n\tasr\tx27, x9, #63\n\textr\tx6, x7, x6, #30\n\textr\tx7, x8, x7, #30\n\textr\tx8, x9, x8, #30\n\n\teor\tx3, x3, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x27\n\tstp\tx3, x4, [x1,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x1,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x1,#8*4]\n\n\tret\n.size\t__smul_384_n_shift_by_30,.-__smul_384_n_shift_by_30\n.type\t__ab_approximation_30, %function\n.align\t4\n__ab_approximation_30:\n\tldp\tx13, x14, [x0,#8*4]\t// |a| is still in registers\n\tldp\tx11, x12, [x0,#8*2]\n\n\torr\tx21, x8, x14\t// check top-most limbs, ...\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx21, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx21, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx21, x8, x14\t// and one more, ...\n\tcsel\tx13, x13, x10, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x3, ne\n\torr\tx21, x8, x14\n\tcsel\tx13, x13, x9, ne\n\n\tclz\tx21, x21\n\tcmp\tx21, #64\n\tcsel\tx21, x21, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx22, x21\n\n\tlslv\tx8, x8, x21\t// align high limbs to the left\n\tlslv\tx14, x14, x21\n\tlsrv\tx7, x7, x22\n\tlsrv\tx13, x13, x22\n\tand\tx7, x7, x22, asr#6\n\tand\tx13, x13, x22, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tbfxil\tx8, x3, #0, #32\n\tbfxil\tx14, x9, #0, #32\n\n\tb\t__inner_loop_30\n\tret\n.size\t__ab_approximation_30,.-__ab_approximation_30\n\n.type\t__inner_loop_30, %function\n.align\t4\n__inner_loop_30:\n\tmov\tx28, #30\n\tmov\tx17, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx20, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx27,#0x7FFFFFFF7FFFFFFF\n\n.Loop_30:\n\tsbfx\tx24, x8, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x8, x14\n\tsub\tx28, x28, #1\n\tand\tx21, x14, x24\n\n\tsub\tx22, x14, x8\t\t// |b_|-|a_|\n\tsubs\tx23, x8, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\t// L + (a_ & b_) >> 1\n\tmov\tx21, x20\n\tcsel\tx14, x14, x8, hs\t// |b_| = |a_|\n\tcsel\tx8, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx20, x20, x17,  hs\t// exchange |fg0| and |fg1|\n\tcsel\tx17, x17, x21, hs\n\tcsel\tx2,   x2,   x25, hs\n\tlsr\tx8, x8, #1\n\tand\tx21, x20, x24\n\tand\tx22, x27, x24\n\tadd\tx23, x14, #2\n\tsub\tx17, x17, x21\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx20, x20, x20\t// |f1|<<=1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\tadd\tx17, x17, x22\n\tsub\tx20, x20, x27\n\n\tcbnz\tx28, .Loop_30\n\n\tmov\tx27, #0x7FFFFFFF\n\tubfx\tx16, x17, #0, #32\n\tubfx\tx17, x17, #32, #32\n\tubfx\tx19, x20, #0, #32\n\tubfx\tx20, x20, #32, #32\n\tsub\tx16, x16, x27\t\t// remove the bias\n\tsub\tx17, x17, x27\n\tsub\tx19, x19, x27\n\tsub\tx20, x20, x27\n\n\tret\n.size\t__inner_loop_30,.-__inner_loop_30\n.type\t__inner_loop_48, %function\n.align\t4\n__inner_loop_48:\n.Loop_48:\n\tsbfx\tx24, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x3, x9\n\tsub\tx15, x15, #1\n\tand\tx21, x9, x24\n\tsub\tx22, x9, x3\t\t// |b_|-|a_|\n\tsubs\tx23, x3, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx3, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx2,   x2,   x25, hs\n\tadd\tx23, x9, #2\n\tlsr\tx3, x3, #1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\n\tcbnz\tx15, .Loop_48\n\n\tret\n.size\t__inner_loop_48,.-__inner_loop_48\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ct_is_square_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\tct_is_square_mod_384\n.hidden\tct_is_square_mod_384\n.type\tct_is_square_mod_384,@function\n.align\t32\nct_is_square_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$536,%rsp\n.cfi_adjust_cfa_offset\t536\n\n\n\tleaq\t24+255(%rsp),%rax\n\tandq\t$-256,%rax\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbx\n\tmovq\t24(%rsi),%rcx\n\tmovq\t32(%rsi),%rdx\n\tmovq\t40(%rsi),%rdi\n\tmovq\t%rax,%rsi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rcx,72(%rax)\n\tmovq\t%rdx,80(%rax)\n\tmovq\t%rdi,88(%rax)\n\n\txorq\t%rbp,%rbp\n\tmovl\t$24,%ecx\n\tjmp\t.Loop_is_square\n\n.align\t32\n.Loop_is_square:\n\tmovl\t%ecx,16(%rsp)\n\n\tcall\t__ab_approximation_30\n\tmovq\t%rax,0(%rsp)\n\tmovq\t%rbx,8(%rsp)\n\n\tmovq\t$128+48,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmovl\t16(%rsp),%ecx\n\txorq\t$128,%rsi\n\n\tandq\t48(%rdi),%r14\n\tshrq\t$1,%r14\n\taddq\t%r14,%rbp\n\n\tsubl\t$1,%ecx\n\tjnz\t.Loop_is_square\n\n\n\n\n\tmovq\t48(%rsi),%r9\n\tcall\t__inner_loop_48\n\n\tmovq\t$1,%rax\n\tandq\t%rbp,%rax\n\txorq\t$1,%rax\n\n\tleaq\t536(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-536-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tct_is_square_mod_384,.-ct_is_square_mod_384\n\n.type\t__smulq_384_n_shift_by_30,@function\n.align\t32\n__smulq_384_n_shift_by_30:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%rdx,%r14\n\tandq\t%rbx,%r14\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tnegq\t%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\tleaq\t48(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%rdx,%r15\n\tandq\t%rbx,%r15\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tnegq\t%r15\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r15\n\tleaq\t-48(%rsi),%rsi\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\n\tshrdq\t$30,%r9,%r8\n\tshrdq\t$30,%r10,%r9\n\tshrdq\t$30,%r11,%r10\n\tshrdq\t$30,%r12,%r11\n\tshrdq\t$30,%r13,%r12\n\tshrdq\t$30,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbx,%rbx\n\tsubq\t%r14,%rbx\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbx,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_384_n_shift_by_30,.-__smulq_384_n_shift_by_30\n.type\t__ab_approximation_30,@function\n.align\t32\n__ab_approximation_30:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t88(%rsi),%rbx\n\tmovq\t80(%rsi),%r15\n\tmovq\t72(%rsi),%r14\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r11,%r12\n\tmovq\t64(%rsi),%r11\n\tcmovzq\t%r14,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r10,%r12\n\tmovq\t56(%rsi),%r10\n\tcmovzq\t%r11,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r9,%r12\n\tmovq\t48(%rsi),%r9\n\tcmovzq\t%r10,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r8,%r12\n\tcmovzq\t%r9,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r13\n\tcmovzq\t%r9,%rbx\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%r12,%r13\n\tshldq\t%cl,%r15,%rbx\n\n\tmovq\t$0xFFFFFFFF00000000,%rax\n\tmovl\t%r8d,%r8d\n\tmovl\t%r9d,%r9d\n\tandq\t%rax,%r13\n\tandq\t%rax,%rbx\n\torq\t%r13,%r8\n\torq\t%rbx,%r9\n\n\tjmp\t__inner_loop_30\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__ab_approximation_30,.-__ab_approximation_30\n.type\t__inner_loop_30,@function\n.align\t32\n__inner_loop_30:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rbx\n\tmovq\t$0x800000007FFFFFFF,%rcx\n\tleaq\t-1(%rbx),%r15\n\tmovl\t$30,%edi\n\n.Loop_30:\n\tmovq\t%r8,%rax\n\tandq\t%r9,%rax\n\tshrq\t$1,%rax\n\n\tcmpq\t%r9,%r8\n\tmovq\t%r8,%r10\n\tmovq\t%r9,%r11\n\tleaq\t(%rax,%rbp,1),%rax\n\tmovq\t%rbx,%r12\n\tmovq\t%rcx,%r13\n\tmovq\t%rbp,%r14\n\tcmovbq\t%r9,%r8\n\tcmovbq\t%r10,%r9\n\tcmovbq\t%rcx,%rbx\n\tcmovbq\t%r12,%rcx\n\tcmovbq\t%rax,%rbp\n\n\tsubq\t%r9,%r8\n\tsubq\t%rcx,%rbx\n\taddq\t%r15,%rbx\n\n\ttestq\t$1,%r10\n\tcmovzq\t%r10,%r8\n\tcmovzq\t%r11,%r9\n\tcmovzq\t%r12,%rbx\n\tcmovzq\t%r13,%rcx\n\tcmovzq\t%r14,%rbp\n\n\tleaq\t2(%r9),%rax\n\tshrq\t$1,%r8\n\tshrq\t$2,%rax\n\taddq\t%rcx,%rcx\n\tleaq\t(%rax,%rbp,1),%rbp\n\tsubq\t%r15,%rcx\n\n\tsubl\t$1,%edi\n\tjnz\t.Loop_30\n\n\tshrq\t$32,%r15\n\tmovl\t%ebx,%eax\n\tshrq\t$32,%rbx\n\tmovl\t%ecx,%edx\n\tshrq\t$32,%rcx\n\tsubq\t%r15,%rax\n\tsubq\t%r15,%rbx\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__inner_loop_30,.-__inner_loop_30\n\n.type\t__inner_loop_48,@function\n.align\t32\n__inner_loop_48:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovl\t$48,%edi\n\n.Loop_48:\n\tmovq\t%r8,%rax\n\tandq\t%r9,%rax\n\tshrq\t$1,%rax\n\n\tcmpq\t%r9,%r8\n\tmovq\t%r8,%r10\n\tmovq\t%r9,%r11\n\tleaq\t(%rax,%rbp,1),%rax\n\tmovq\t%rbp,%r12\n\tcmovbq\t%r9,%r8\n\tcmovbq\t%r10,%r9\n\tcmovbq\t%rax,%rbp\n\n\tsubq\t%r9,%r8\n\n\ttestq\t$1,%r10\n\tcmovzq\t%r10,%r8\n\tcmovzq\t%r11,%r9\n\tcmovzq\t%r12,%rbp\n\n\tleaq\t2(%r9),%rax\n\tshrq\t$1,%r8\n\tshrq\t$2,%rax\n\taddq\t%rax,%rbp\n\n\tsubl\t$1,%edi\n\tjnz\t.Loop_48\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__inner_loop_48,.-__inner_loop_48\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ctq_inverse_mod_384-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n.globl\tct_inverse_mod_384\n.hidden\tct_inverse_mod_384\n.type\tct_inverse_mod_384,@function\n.align\t32\nct_inverse_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tct_inverse_mod_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$1112,%rsp\n.cfi_adjust_cfa_offset\t1112\n\n\n\tleaq\t88+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t0(%rdx),%r14\n\tmovq\t8(%rdx),%r15\n\tmovq\t16(%rdx),%rbx\n\tmovq\t24(%rdx),%rbp\n\tmovq\t32(%rdx),%rsi\n\tmovq\t40(%rdx),%rdi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rbp,72(%rax)\n\tmovq\t%rsi,80(%rax)\n\tmovq\t%rax,%rsi\n\tmovq\t%rdi,88(%rax)\n\n\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmovq\t%rdx,96(%rdi)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmovq\t%rdx,104(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\n\tmovq\t96(%rsi),%rax\n\tmovq\t152(%rsi),%r11\n\tmovq\t%rdx,%rbx\n\tmovq\t%rax,%r10\n\timulq\t56(%rsp)\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t64(%rsp)\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tleaq\t96(%rsi),%rsi\n\n\tmovq\t%r10,%rax\n\timulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t%rcx\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,104(%rdi)\n\tmovq\t%r9,112(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,120(%rdi)\n\tmovq\t%r9,128(%rdi)\n\tmovq\t%r9,136(%rdi)\n\tmovq\t%r9,144(%rdi)\n\tmovq\t%r9,152(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%r14,64(%rdi)\n\tmovq\t%r14,72(%rdi)\n\tmovq\t%r14,80(%rdi)\n\tmovq\t%r14,88(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t48(%rsi),%r10\n\tmovq\t56(%rsi),%r11\n\tcall\t__inner_loop_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r10,48(%rdi)\n\n\n\n\tleaq\t96(%rsi),%rsi\n\tleaq\t96(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\n\n\txorq\t$256+96,%rsi\n\tmovl\t$24,%edi\n\n\tmovq\t0(%rsi),%r8\n\txorq\t%r9,%r9\n\tmovq\t48(%rsi),%r10\n\txorq\t%r11,%r11\n\tcall\t__inner_loop_62\n\n\n\n\n\n\n\n\tleaq\t96(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulq_768x63\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%r13\n\tsarq\t$63,%r13\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\tadcq\t$0,%rdx\n\n\tmovq\t%rdx,%r13\n\tnegq\t%rdx\n\torq\t%rdx,%r13\n\tsarq\t$63,%rdx\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\txorq\t%rdx,%r8\n\txorq\t%rsi,%rsi\n\txorq\t%rdx,%r9\n\tsubq\t%rdx,%rsi\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\tleaq\t1112(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-1112-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tct_inverse_mod_384,.-ct_inverse_mod_384\n.type\t__smulq_768x63,@function\n.align\t32\n__smulq_768x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tleaq\t56(%rsi),%rsi\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmovq\t%r9,8(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmovq\t%r10,16(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%r11,24(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmovq\t%r12,32(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tsarq\t$63,%r14\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%rcx,%rdx\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%r15\n\tmovq\t64(%rsi),%rbx\n\tmovq\t72(%rsi),%rbp\n\tmovq\t80(%rsi),%rcx\n\tmovq\t88(%rsi),%rdi\n\n\tmovq\t%rdx,%rsi\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rsi\n\taddq\t%rax,%rsi\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\txorq\t%rdx,%rbx\n\txorq\t%rdx,%rbp\n\txorq\t%rdx,%rcx\n\txorq\t%rdx,%rdi\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rbx\n\tadcq\t$0,%rbp\n\tadcq\t$0,%rcx\n\tadcq\t$0,%rdi\n\n\tmulq\t%rsi\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rsi\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rsi\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rsi\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rsi\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rsi\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rsi\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tmulq\t%rsi\n\taddq\t%rax,%r15\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbx\n\tmulq\t%rsi\n\taddq\t%rax,%rbx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\tmulq\t%rsi\n\taddq\t%rax,%rbp\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rcx\n\tmulq\t%rsi\n\taddq\t%rax,%rcx\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rdi\n\timulq\t%rsi\n\tmovq\t8(%rsp),%rsi\n\taddq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\n\taddq\t0(%rsi),%r8\n\tadcq\t8(%rsi),%r9\n\tadcq\t16(%rsi),%r10\n\tadcq\t24(%rsi),%r11\n\tadcq\t32(%rsi),%r12\n\tadcq\t40(%rsi),%r13\n\tadcq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%rdi\n\tadcq\t%rdi,%r15\n\tadcq\t%rdi,%rbx\n\tadcq\t%rdi,%rbp\n\tadcq\t%rdi,%rcx\n\tadcq\t%rdi,%rax\n\tadcq\t%rdi,%rdx\n\n\tleaq\t(%rsi),%rdi\n\tmovq\t16(%rsp),%rsi\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_768x63,.-__smulq_768x63\n.type\t__smulq_384x63,@function\n.align\t32\n__smulq_384x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t56(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,%r15\n\tmovq\t%r14,%rbx\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t-56(%rsi),%rsi\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t%r15,%r13\n\tadcq\t%rbx,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_384x63,.-__smulq_384x63\n.type\t__smulq_384_n_shift_by_62,@function\n.align\t32\n__smulq_384_n_shift_by_62:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\tmovq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t48(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\tmovq\t%rdx,%r15\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r15\n\tnegq\t%r15\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r15\n\n\tleaq\t-48(%rsi),%rsi\n\tmovq\t%rbx,%rdx\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\n\tshrdq\t$62,%r9,%r8\n\tshrdq\t$62,%r10,%r9\n\tshrdq\t$62,%r11,%r10\n\tshrdq\t$62,%r12,%r11\n\tshrdq\t$62,%r13,%r12\n\tshrdq\t$62,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulq_384_n_shift_by_62,.-__smulq_384_n_shift_by_62\n.type\t__ab_approximation_62,@function\n.align\t32\n__ab_approximation_62:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t40(%rsi),%r9\n\tmovq\t88(%rsi),%r11\n\tmovq\t32(%rsi),%rbx\n\tmovq\t80(%rsi),%rbp\n\tmovq\t24(%rsi),%r8\n\tmovq\t72(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t16(%rsi),%r8\n\tmovq\t64(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t8(%rsi),%r8\n\tmovq\t56(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t0(%rsi),%r8\n\tmovq\t48(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tjmp\t__inner_loop_62\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__ab_approximation_62,.-__ab_approximation_62\n.type\t__inner_loop_62,@function\n.align\t8\n.long\t0\n__inner_loop_62:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t$1,%r13\n\tmovq\t%rsi,8(%rsp)\n\n.Loop_62:\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\ttestq\t$1,%r8\n\tmovq\t%r10,%rbp\n\tmovq\t%r11,%r14\n\tcmovnzq\t%r10,%rax\n\tcmovnzq\t%r11,%rbx\n\tsubq\t%r8,%rbp\n\tsbbq\t%r9,%r14\n\tmovq\t%r8,%r15\n\tmovq\t%r9,%rsi\n\tsubq\t%rax,%r8\n\tsbbq\t%rbx,%r9\n\tcmovcq\t%rbp,%r8\n\tcmovcq\t%r14,%r9\n\tcmovcq\t%r15,%r10\n\tcmovcq\t%rsi,%r11\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrdq\t$1,%r9,%r8\n\tshrq\t$1,%r9\n\ttestq\t$1,%r15\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%edi\n\tjnz\t.Loop_62\n\n\tmovq\t8(%rsp),%rsi\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rax\n\tlfence\n\tjmpq\t*%rax\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__inner_loop_62,.-__inner_loop_62\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/ctx_inverse_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\tctx_inverse_mod_384\n.hidden\tctx_inverse_mod_384\n.type\tctx_inverse_mod_384,@function\n.align\t32\nctx_inverse_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nct_inverse_mod_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$1112,%rsp\n.cfi_adjust_cfa_offset\t1112\n\n\n\tleaq\t88+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t0(%rdx),%r14\n\tmovq\t8(%rdx),%r15\n\tmovq\t16(%rdx),%rbx\n\tmovq\t24(%rdx),%rbp\n\tmovq\t32(%rdx),%rsi\n\tmovq\t40(%rdx),%rdi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rbp,72(%rax)\n\tmovq\t%rsi,80(%rax)\n\tmovq\t%rax,%rsi\n\tmovq\t%rdi,88(%rax)\n\n\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmovq\t%rdx,96(%rdi)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmovq\t%rdx,104(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\n\tmovq\t96(%rsi),%rax\n\tmovq\t152(%rsi),%r11\n\tmovq\t%rdx,%rbx\n\tmovq\t%rax,%r10\n\timulq\t56(%rsp)\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t64(%rsp)\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tleaq\t96(%rsi),%rsi\n\n\tmovq\t%r10,%rax\n\timulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t%rcx\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,104(%rdi)\n\tmovq\t%r9,112(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,120(%rdi)\n\tmovq\t%r9,128(%rdi)\n\tmovq\t%r9,136(%rdi)\n\tmovq\t%r9,144(%rdi)\n\tmovq\t%r9,152(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%r14,64(%rdi)\n\tmovq\t%r14,72(%rdi)\n\tmovq\t%r14,80(%rdi)\n\tmovq\t%r14,88(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\n\txorq\t$256+96,%rsi\n\tmovl\t$55,%edi\n\n\tmovq\t0(%rsi),%r8\n\n\tmovq\t48(%rsi),%r10\n\n\tcall\t__tail_loop_55\n\n\n\n\n\n\n\n\tleaq\t96(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulx_768x63\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%r13\n\tsarq\t$63,%r13\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\tadcq\t$0,%rdx\n\n\tmovq\t%rdx,%r13\n\tnegq\t%rdx\n\torq\t%rdx,%r13\n\tsarq\t$63,%rdx\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\txorq\t%rdx,%r8\n\txorq\t%rsi,%rsi\n\txorq\t%rdx,%r9\n\tsubq\t%rdx,%rsi\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\tleaq\t1112(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-1112-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tctx_inverse_mod_384,.-ctx_inverse_mod_384\n.type\t__smulx_768x63,@function\n.align\t32\n__smulx_768x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tleaq\t56(%rsi),%rsi\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\txorq\t%rax,%r14\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tsarq\t$63,%r14\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%rcx,%rdx\n\tmovq\t%rcx,%rax\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%r15\n\tmovq\t64(%rsi),%rbx\n\tmovq\t72(%rsi),%rbp\n\tmovq\t80(%rsi),%rcx\n\tmovq\t88(%rsi),%rdi\n\n\tsarq\t$63,%rax\n\txorq\t%rsi,%rsi\n\tsubq\t%rax,%rsi\n\n\txorq\t%rax,%rdx\n\taddq\t%rsi,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\txorq\t%rax,%r14\n\txorq\t%rax,%r15\n\txorq\t%rax,%rbx\n\txorq\t%rax,%rbp\n\txorq\t%rax,%rcx\n\txorq\t%rdi,%rax\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rbx\n\tadcq\t$0,%rbp\n\tadcq\t$0,%rcx\n\tadcq\t$0,%rax\n\n\tmulxq\t%r8,%r8,%rsi\n\tmulxq\t%r9,%r9,%rdi\n\taddq\t%rsi,%r9\n\tmulxq\t%r10,%r10,%rsi\n\tadcq\t%rdi,%r10\n\tmulxq\t%r11,%r11,%rdi\n\tadcq\t%rsi,%r11\n\tmulxq\t%r12,%r12,%rsi\n\tadcq\t%rdi,%r12\n\tmulxq\t%r13,%r13,%rdi\n\tadcq\t%rsi,%r13\n\tmulxq\t%r14,%r14,%rsi\n\tadcq\t%rdi,%r14\n\tmulxq\t%r15,%r15,%rdi\n\tadcq\t%rsi,%r15\n\tmulxq\t%rbx,%rbx,%rsi\n\tadcq\t%rdi,%rbx\n\tmulxq\t%rbp,%rbp,%rdi\n\tadcq\t%rsi,%rbp\n\tmulxq\t%rcx,%rcx,%rsi\n\tadcq\t%rdi,%rcx\n\tmovq\t8(%rsp),%rdi\n\tadcq\t$0,%rsi\n\timulq\t%rdx\n\taddq\t%rsi,%rax\n\tadcq\t$0,%rdx\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%rsi\n\tadcq\t%rsi,%r15\n\tadcq\t%rsi,%rbx\n\tadcq\t%rsi,%rbp\n\tadcq\t%rsi,%rcx\n\tadcq\t%rsi,%rax\n\tadcq\t%rsi,%rdx\n\n\tmovq\t16(%rsp),%rsi\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulx_768x63,.-__smulx_768x63\n.type\t__smulx_384x63,@function\n.align\t32\n__smulx_384x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%r12\n\tmovq\t0+40(%rsi),%r13\n\tmovq\t0+48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rdx\n\taddq\t%rax,%rdx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\txorq\t%rbp,%r12\n\txorq\t%rbp,%r13\n\txorq\t%rbp,%r14\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tmovq\t%rcx,%rdx\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,%r15\n\tmovq\t%r14,%rbx\n\tmovq\t56+0(%rsi),%r8\n\tmovq\t56+8(%rsi),%r9\n\tmovq\t56+16(%rsi),%r10\n\tmovq\t56+24(%rsi),%r11\n\tmovq\t56+32(%rsi),%r12\n\tmovq\t56+40(%rsi),%r13\n\tmovq\t56+48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rdx\n\taddq\t%rax,%rdx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\txorq\t%rbp,%r12\n\txorq\t%rbp,%r13\n\txorq\t%rbp,%r14\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t%r15,%r13\n\tadcq\t%rbx,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulx_384x63,.-__smulx_384x63\n.type\t__smulx_384_n_shift_by_31,@function\n.align\t32\n__smulx_384_n_shift_by_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%r12\n\tmovq\t0+40(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tandq\t%rdx,%rax\n\tnegq\t%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r14\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%r14,%r10\n\tmulxq\t%r11,%r11,%r14\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%r14,%r12\n\tmulxq\t%r13,%r13,%r14\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,%r15\n\tmovq\t48+0(%rsi),%r8\n\tmovq\t48+8(%rsi),%r9\n\tmovq\t48+16(%rsi),%r10\n\tmovq\t48+24(%rsi),%r11\n\tmovq\t48+32(%rsi),%r12\n\tmovq\t48+40(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tandq\t%rdx,%rax\n\tnegq\t%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r14\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%r14,%r10\n\tmulxq\t%r11,%r11,%r14\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%r14,%r12\n\tmulxq\t%r13,%r13,%r14\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\tmovq\t%rbx,%rdx\n\n\tshrdq\t$31,%r9,%r8\n\tshrdq\t$31,%r10,%r9\n\tshrdq\t$31,%r11,%r10\n\tshrdq\t$31,%r12,%r11\n\tshrdq\t$31,%r13,%r12\n\tshrdq\t$31,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulx_384_n_shift_by_31,.-__smulx_384_n_shift_by_31\n.type\t__smulx_191_n_shift_by_31,@function\n.align\t32\n__smulx_191_n_shift_by_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%r10,%rax\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r10\n\taddq\t%rbp,%r9\n\tadcq\t$0,%r10\n\timulq\t%rdx\n\taddq\t%rax,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmovq\t%rcx,%rdx\n\tmovq\t48+0(%rsi),%r11\n\tmovq\t48+8(%rsi),%r12\n\tmovq\t48+16(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%r13,%rax\n\taddq\t%rbp,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%rax\n\n\tmulxq\t%r11,%r11,%rbp\n\tmulxq\t%r12,%r12,%r13\n\taddq\t%rbp,%r12\n\tadcq\t$0,%r13\n\timulq\t%rdx\n\taddq\t%rax,%r13\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t%r9,%r12\n\tadcq\t%r10,%r13\n\tadcq\t%rdx,%r14\n\tmovq\t%rbx,%rdx\n\n\tshrdq\t$31,%r12,%r11\n\tshrdq\t$31,%r13,%r12\n\tshrdq\t$31,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r11,0(%rdi)\n\tmovq\t%r12,8(%rdi)\n\tmovq\t%r13,16(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__smulx_191_n_shift_by_31,.-__smulx_191_n_shift_by_31\n.type\t__ab_approximation_31,@function\n.align\t32\n__ab_approximation_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t40(%rsi),%r9\n\tmovq\t88(%rsi),%r11\n\tmovq\t32(%rsi),%rbx\n\tmovq\t80(%rsi),%rbp\n\tmovq\t24(%rsi),%r8\n\tmovq\t72(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t16(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t64(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t8(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t56(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t0(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t48(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r9\n\tcmovzq\t%r10,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tmovl\t$0x7FFFFFFF,%eax\n\tandq\t%rax,%r8\n\tandq\t%rax,%r10\n\tandnq\t%r9,%rax,%r9\n\tandnq\t%r11,%rax,%r11\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\n\tjmp\t__inner_loop_31\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__ab_approximation_31,.-__ab_approximation_31\n.type\t__inner_loop_31,@function\n.align\t32\n__inner_loop_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rcx\n\tmovq\t$0x800000007FFFFFFF,%r13\n\tmovq\t$0x7FFFFFFF7FFFFFFF,%r15\n\n.Loop_31:\n\tcmpq\t%r10,%r8\n\tmovq\t%r8,%rax\n\tmovq\t%r10,%rbx\n\tmovq\t%rcx,%rbp\n\tmovq\t%r13,%r14\n\tcmovbq\t%r10,%r8\n\tcmovbq\t%rax,%r10\n\tcmovbq\t%r13,%rcx\n\tcmovbq\t%rbp,%r13\n\n\tsubq\t%r10,%r8\n\tsubq\t%r13,%rcx\n\taddq\t%r15,%rcx\n\n\ttestq\t$1,%rax\n\tcmovzq\t%rax,%r8\n\tcmovzq\t%rbx,%r10\n\tcmovzq\t%rbp,%rcx\n\tcmovzq\t%r14,%r13\n\n\tshrq\t$1,%r8\n\taddq\t%r13,%r13\n\tsubq\t%r15,%r13\n\tsubl\t$1,%edi\n\tjnz\t.Loop_31\n\n\tshrq\t$32,%r15\n\tmovl\t%ecx,%edx\n\tmovl\t%r13d,%r12d\n\tshrq\t$32,%rcx\n\tshrq\t$32,%r13\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\tsubq\t%r15,%r12\n\tsubq\t%r15,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__inner_loop_31,.-__inner_loop_31\n\n.type\t__tail_loop_55,@function\n.align\t32\n__tail_loop_55:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t$1,%r13\n\n.Loop_55:\n\txorq\t%rax,%rax\n\ttestq\t$1,%r8\n\tmovq\t%r10,%rbx\n\tcmovnzq\t%r10,%rax\n\tsubq\t%r8,%rbx\n\tmovq\t%r8,%rbp\n\tsubq\t%rax,%r8\n\tcmovcq\t%rbx,%r8\n\tcmovcq\t%rbp,%r10\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrq\t$1,%r8\n\ttestq\t$1,%rbp\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%edi\n\tjnz\t.Loop_55\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__tail_loop_55,.-__tail_loop_55\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/div3w-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tdiv_3_limbs\n.hidden\tdiv_3_limbs\n.type\tdiv_3_limbs,%function\n.align\t5\ndiv_3_limbs:\n\thint\t#34\n\tldp\tx4,x5,[x0]\t// load R\n\teor\tx0,x0,x0\t// Q = 0\n\tmov\tx3,#64\t\t// loop counter\n\tnop\n\n.Loop:\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsbcs\tx7,x5,x2\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tcsel\tx4,x4,x6,lo\t// select between R and R - D\n\textr\tx1,x2,x1,#1\t// D >>= 1\n\tcsel\tx5,x5,x7,lo\n\tlsr\tx2,x2,#1\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\tsub\tx3,x3,#1\n\tcbnz\tx3,.Loop\n\n\tasr\tx3,x0,#63\t// top bit -> mask\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tsbcs\tx7,x5,x2\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\n\torr\tx0,x0,x3\t// all ones if overflow\n\n\tret\n.size\tdiv_3_limbs,.-div_3_limbs\n.globl\tquot_rem_128\n.hidden\tquot_rem_128\n.type\tquot_rem_128,%function\n.align\t5\nquot_rem_128:\n\thint\t#34\n\tldp\tx3,x4,[x1]\n\n\tmul\tx5,x3,x2\t// divisor[0:1} * quotient\n\tumulh\tx6,x3,x2\n\tmul\tx11,  x4,x2\n\tumulh\tx7,x4,x2\n\n\tldp\tx8,x9,[x0]\t// load 3 limbs of the dividend\n\tldr\tx10,[x0,#16]\n\n\tadds\tx6,x6,x11\n\tadc\tx7,x7,xzr\n\n\tsubs\tx8,x8,x5\t// dividend - divisor * quotient\n\tsbcs\tx9,x9,x6\n\tsbcs\tx10,x10,x7\n\tsbc\tx5,xzr,xzr\t\t// borrow -> mask\n\n\tadd\tx2,x2,x5\t// if borrowed, adjust the quotient ...\n\tand\tx3,x3,x5\n\tand\tx4,x4,x5\n\tadds\tx8,x8,x3\t// ... and add divisor\n\tadc\tx9,x9,x4\n\n\tstp\tx8,x9,[x0]\t// save 2 limbs of the remainder\n\tstr\tx2,[x0,#16]\t// and one limb of the quotient\n\n\tmov\tx0,x2\t\t// return adjusted quotient\n\n\tret\n.size\tquot_rem_128,.-quot_rem_128\n\n.globl\tquot_rem_64\n.hidden\tquot_rem_64\n.type\tquot_rem_64,%function\n.align\t5\nquot_rem_64:\n\thint\t#34\n\tldr\tx3,[x1]\n\tldr\tx8,[x0]\t// load 1 limb of the dividend\n\n\tmul\tx5,x3,x2\t// divisor * quotient\n\n\tsub\tx8,x8,x5\t// dividend - divisor * quotient\n\n\tstp\tx8,x2,[x0]\t// save remainder and quotient\n\n\tmov\tx0,x2\t\t// return quotient\n\n\tret\n.size\tquot_rem_64,.-quot_rem_64\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/div3w-x86_64.s",
    "content": ".text\t\n\n.globl\tdiv_3_limbs\n.hidden\tdiv_3_limbs\n.type\tdiv_3_limbs,@function\n.align\t32\ndiv_3_limbs:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\txorq\t%rax,%rax\n\tmovl\t$64,%ecx\n\n.Loop:\n\tmovq\t%r8,%r10\n\tsubq\t%rsi,%r8\n\tmovq\t%r9,%r11\n\tsbbq\t%rdx,%r9\n\tleaq\t1(%rax,%rax,1),%rax\n\tmovq\t%rdx,%rdi\n\tcmovcq\t%r10,%r8\n\tcmovcq\t%r11,%r9\n\tsbbq\t$0,%rax\n\tshlq\t$63,%rdi\n\tshrq\t$1,%rsi\n\tshrq\t$1,%rdx\n\torq\t%rdi,%rsi\n\tsubl\t$1,%ecx\n\tjnz\t.Loop\n\n\tleaq\t1(%rax,%rax,1),%rcx\n\tsarq\t$63,%rax\n\n\tsubq\t%rsi,%r8\n\tsbbq\t%rdx,%r9\n\tsbbq\t$0,%rcx\n\n\torq\t%rcx,%rax\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tdiv_3_limbs,.-div_3_limbs\n.globl\tquot_rem_128\n.hidden\tquot_rem_128\n.type\tquot_rem_128,@function\n.align\t32\nquot_rem_128:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t%rdx,%rax\n\tmovq\t%rdx,%rcx\n\n\tmulq\t0(%rsi)\n\tmovq\t%rax,%r8\n\tmovq\t%rcx,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r9\n\tadcq\t$0,%rdx\n\n\tmovq\t0(%rdi),%r10\n\tmovq\t8(%rdi),%r11\n\tmovq\t16(%rdi),%rax\n\n\tsubq\t%r8,%r10\n\tsbbq\t%r9,%r11\n\tsbbq\t%rdx,%rax\n\tsbbq\t%r8,%r8\n\n\taddq\t%r8,%rcx\n\tmovq\t%r8,%r9\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\taddq\t%r8,%r10\n\tadcq\t%r9,%r11\n\n\tmovq\t%r10,0(%rdi)\n\tmovq\t%r11,8(%rdi)\n\tmovq\t%rcx,16(%rdi)\n\n\tmovq\t%rcx,%rax\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tquot_rem_128,.-quot_rem_128\n\n\n\n\n\n.globl\tquot_rem_64\n.hidden\tquot_rem_64\n.type\tquot_rem_64,@function\n.align\t32\nquot_rem_64:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t%rdx,%rax\n\timulq\t0(%rsi),%rdx\n\n\tmovq\t0(%rdi),%r10\n\n\tsubq\t%rdx,%r10\n\n\tmovq\t%r10,0(%rdi)\n\tmovq\t%rax,8(%rdi)\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tquot_rem_64,.-quot_rem_64\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/mul_mont_256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tmul_mont_sparse_256\n.hidden\tmul_mont_sparse_256\n.type\tmul_mont_sparse_256,%function\n.align\t5\nmul_mont_sparse_256:\n\thint\t#34\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldr\tx9,        [x2]\n\tldp\tx12,x13,[x1,#16]\n\n\tmul\tx19,x10,x9\n\tldp\tx5,x6,[x3]\n\tmul\tx20,x11,x9\n\tldp\tx7,x8,[x3,#16]\n\tmul\tx21,x12,x9\n\tmul\tx22,x13,x9\n\n\tumulh\tx14,x10,x9\n\tumulh\tx15,x11,x9\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tumulh\tx17,x13,x9\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,xzr,    x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*1]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*2]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*3]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tadcs\tx20,x21,x15\n\tadcs\tx21,x22,x16\n\tadcs\tx22,x23,x17\n\tadc\tx23,xzr,xzr\n\n\tsubs\tx14,x19,x5\n\tsbcs\tx15,x20,x6\n\tsbcs\tx16,x21,x7\n\tsbcs\tx17,x22,x8\n\tsbcs\txzr,    x23,xzr\n\n\tcsel\tx19,x19,x14,lo\n\tcsel\tx20,x20,x15,lo\n\tcsel\tx21,x21,x16,lo\n\tcsel\tx22,x22,x17,lo\n\n\tstp\tx19,x20,[x0]\n\tstp\tx21,x22,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\tret\n.size\tmul_mont_sparse_256,.-mul_mont_sparse_256\n.globl\tsqr_mont_sparse_256\n.hidden\tsqr_mont_sparse_256\n.type\tsqr_mont_sparse_256,%function\n.align\t5\nsqr_mont_sparse_256:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tmov\tx4,x3\n\n\t////////////////////////////////////////////////////////////////\n\t//  |  |  |  |  |  |a1*a0|  |\n\t//  |  |  |  |  |a2*a0|  |  |\n\t//  |  |a3*a2|a3*a0|  |  |  |\n\t//  |  |  |  |a2*a1|  |  |  |\n\t//  |  |  |a3*a1|  |  |  |  |\n\t// *|  |  |  |  |  |  |  | 2|\n\t// +|a3*a3|a2*a2|a1*a1|a0*a0|\n\t//  |--+--+--+--+--+--+--+--|\n\t//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10\n\t//\n\t//  \"can't overflow\" below mark carrying into high part of\n\t//  multiplication result, which can't overflow, because it\n\t//  can never be all ones.\n\n\tmul\tx11,x6,x5\t// a[1]*a[0]\n\tumulh\tx15,x6,x5\n\tmul\tx12,x7,x5\t// a[2]*a[0]\n\tumulh\tx16,x7,x5\n\tmul\tx13,x8,x5\t// a[3]*a[0]\n\tumulh\tx19,x8,x5\n\n\tadds\tx12,x12,x15\t// accumulate high parts of multiplication\n\tmul\tx14,x7,x6\t// a[2]*a[1]\n\tumulh\tx15,x7,x6\n\tadcs\tx13,x13,x16\n\tmul\tx16,x8,x6\t// a[3]*a[1]\n\tumulh\tx17,x8,x6\n\tadc\tx19,x19,xzr\t// can't overflow\n\n\tmul\tx20,x8,x7\t// a[3]*a[2]\n\tumulh\tx21,x8,x7\n\n\tadds\tx15,x15,x16\t// accumulate high parts of multiplication\n\tmul\tx10,x5,x5\t// a[0]*a[0]\n\tadc\tx16,x17,xzr\t// can't overflow\n\n\tadds\tx13,x13,x14\t// accumulate low parts of multiplication\n\tumulh\tx5,x5,x5\n\tadcs\tx19,x19,x15\n\tmul\tx15,x6,x6\t// a[1]*a[1]\n\tadcs\tx20,x20,x16\n\tumulh\tx6,x6,x6\n\tadc\tx21,x21,xzr\t// can't overflow\n\n\tadds\tx11,x11,x11\t// acc[1-6]*=2\n\tmul\tx16,x7,x7\t// a[2]*a[2]\n\tadcs\tx12,x12,x12\n\tumulh\tx7,x7,x7\n\tadcs\tx13,x13,x13\n\tmul\tx17,x8,x8\t// a[3]*a[3]\n\tadcs\tx19,x19,x19\n\tumulh\tx8,x8,x8\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadc\tx22,xzr,xzr\n\n\tadds\tx11,x11,x5\t// +a[i]*a[i]\n\tadcs\tx12,x12,x15\n\tadcs\tx13,x13,x6\n\tadcs\tx19,x19,x16\n\tadcs\tx20,x20,x7\n\tadcs\tx21,x21,x17\n\tadc\tx22,x22,x8\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadds\tx10,x10,x19\t// accumulate upper half\n\tadcs\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadc\tx19,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x19,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_sparse_256,.-sqr_mont_sparse_256\n.globl\tfrom_mont_256\n.hidden\tfrom_mont_256\n.type\tfrom_mont_256,%function\n.align\t5\nfrom_mont_256:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tfrom_mont_256,.-from_mont_256\n\n.globl\tredc_mont_256\n.hidden\tredc_mont_256\n.type\tredc_mont_256,%function\n.align\t5\nredc_mont_256:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx16,x17,[x1,#48]\n\n\tadds\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadcs\tx12,x12,x16\n\tadcs\tx13,x13,x17\n\tadc\tx9,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x9,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tredc_mont_256,.-redc_mont_256\n\n.type\t__mul_by_1_mont_256,%function\n.align\t5\n__mul_by_1_mont_256:\n\tmul\tx3,x4,x10\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tadc\tx13,x9,x17\n\n\tret\n.size\t__mul_by_1_mont_256,.-__mul_by_1_mont_256\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/mul_mont_384-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n.text\n\n.globl\tadd_mod_384x384\n.hidden\tadd_mod_384x384\n.type\tadd_mod_384x384,%function\n.align\t5\nadd_mod_384x384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__add_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tadd_mod_384x384,.-add_mod_384x384\n\n.type\t__add_mod_384x384,%function\n.align\t5\n__add_mod_384x384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tadcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tadcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tadcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tstp\tx11,x12,[x0,#48]\n\tcsel\tx15,x15,x23,lo\n\tstp\tx13,x14,[x0,#64]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n.size\t__add_mod_384x384,.-__add_mod_384x384\n\n.globl\tsub_mod_384x384\n.hidden\tsub_mod_384x384\n.type\tsub_mod_384x384,%function\n.align\t5\nsub_mod_384x384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsub_mod_384x384,.-sub_mod_384x384\n\n.type\t__sub_mod_384x384,%function\n.align\t5\n__sub_mod_384x384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tsbcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tsbcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tsbcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n.size\t__sub_mod_384x384,.-__sub_mod_384x384\n\n.type\t__add_mod_384,%function\n.align\t5\n__add_mod_384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tstp\tx11,x12,[x0]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.type\t__sub_mod_384,%function\n.align\t5\n__sub_mod_384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tmul_mont_384x\n.hidden\tmul_mont_384x\n.type\tmul_mont_384x,%function\n.align\t5\nmul_mont_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#288\t\t// space for 3 768-bit vectors\n\n\tmov\tx26,x0\t\t// save r_ptr\n\tmov\tx27,x1\t\t// save b_ptr\n\tmov\tx28,x2\t\t// save b_ptr\n\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tadd\tx1,x1,#48\n\tadd\tx2,x2,#48\n\tadd\tx0,sp,#96\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tsub\tx2,x1,#48\n\tadd\tx0,sp,#240\n\tbl\t__add_mod_384\n\n\tadd\tx1,x28,#0\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#192\n\tbl\t__add_mod_384\n\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\t\t// mul_384(t2, a->re+a->im, b->re+b->im)\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,x0\n\tadd\tx2,sp,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,sp,#96\n\tbl\t__sub_mod_384x384\t// t2 = t2-t0-t1\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#96\n\tadd\tx0,sp,#0\n\tbl\t__sub_mod_384x384\t// t0 = t0-t1\n\n\tadd\tx1,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\n\tadd\tx1,sp,#192\n\tadd\tx0,x0,#48\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#288\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_mont_384x,.-mul_mont_384x\n\n.globl\tsqr_mont_384x\n.hidden\tsqr_mont_384x\n.type\tsqr_mont_384x,%function\n.align\t5\nsqr_mont_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#96\t\t// space for 2 384-bit vectors\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tadd\tx0,sp,#0\n\tbl\t__add_mod_384\t\t// t0 = a->re + a->im\n\n\tadd\tx0,sp,#48\n\tbl\t__sub_mod_384\t\t// t1 = a->re - a->im\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx11,x11,x11\t// add with itself\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx19,x11,x19,lo\n\tcsel\tx20,x12,x20,lo\n\tcsel\tx21,x13,x21,lo\n\tldp\tx11,x12,[sp]\n\tcsel\tx22,x14,x22,lo\n\tldr\tx17,        [sp,#48]\n\tcsel\tx23,x15,x23,lo\n\tldp\tx13,x14,[sp,#16]\n\tcsel\tx24,x16,x24,lo\n\tldp\tx15,x16,[sp,#32]\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->re, t0, t1)\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_384x,.-sqr_mont_384x\n\n.globl\tmul_mont_384\n.hidden\tmul_mont_384\n.type\tmul_mont_384,%function\n.align\t5\nmul_mont_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_mont_384,.-mul_mont_384\n\n.type\t__mul_mont_384,%function\n.align\t5\n__mul_mont_384:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tmov\tx17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*1]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*2]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*3]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*4]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*5]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\t// pull r_ptr\n\tadc\tx17,x17,xzr\n\n\tadds\tx19,x20,x26\n\tadcs\tx20,x21,x27\n\tadcs\tx21,x22,x28\n\tadcs\tx22,x23,x0\n\tadcs\tx23,x24,x1\n\tadcs\tx24,x25,x3\n\tadc\tx25,x17,xzr\n\n\tsubs\tx26,x19,x5\n\tsbcs\tx27,x20,x6\n\tsbcs\tx28,x21,x7\n\tsbcs\tx0,x22,x8\n\tsbcs\tx1,x23,x9\n\tsbcs\tx3,x24,x10\n\tsbcs\txzr,    x25,xzr\n\n\tcsel\tx11,x19,x26,lo\n\tcsel\tx12,x20,x27,lo\n\tcsel\tx13,x21,x28,lo\n\tcsel\tx14,x22,x0,lo\n\tcsel\tx15,x23,x1,lo\n\tcsel\tx16,x24,x3,lo\n\tret\n.size\t__mul_mont_384,.-__mul_mont_384\n\n.globl\tsqr_mont_384\n.hidden\tsqr_mont_384\n.type\tsqr_mont_384,%function\n.align\t5\nsqr_mont_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\t\t// space for 768-bit vector\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tmov\tx3,x0\t\t// save r_ptr\n\tmov\tx0,sp\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tmov\tx1,sp\n\tmov\tx0,x3\t\t// restore r_ptr\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_384,.-sqr_mont_384\n\n.globl\tsqr_n_mul_mont_383\n.hidden\tsqr_n_mul_mont_383\n.type\tsqr_n_mul_mont_383,%function\n.align\t5\nsqr_n_mul_mont_383:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#96\t\t// space for 768-bit vector\n\tmov\tx17,x5\t\t\t// save b_ptr\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\tmov\tx0,sp\n.Loop_sqr_383:\n\tbl\t__sqr_384\n\tsub\tx2,x2,#1\t// counter\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,sp\n\tbl\t__mul_by_1_mont_384\n\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\t// just accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tcbnz\tx2,.Loop_sqr_383\n\n\tmov\tx2,x17\n\tldr\tx17,[x17]\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_n_mul_mont_383,.-sqr_n_mul_mont_383\n.type\t__sqr_384,%function\n.align\t5\n__sqr_384:\n\tmul\tx19,x12,x11\n\tmul\tx20,x13,x11\n\tmul\tx21,x14,x11\n\tmul\tx22,x15,x11\n\tmul\tx23,x16,x11\n\n\tumulh\tx6,x12,x11\n\tumulh\tx7,x13,x11\n\tumulh\tx8,x14,x11\n\tumulh\tx9,x15,x11\n\tadds\tx20,x20,x6\n\tumulh\tx10,x16,x11\n\tadcs\tx21,x21,x7\n\tmul\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tmul\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tmul\tx9,x15,x12\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x12\n\n\tadds\tx21,x21,x7\n\tumulh\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x12\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x12\n\tadc\tx25,xzr,xzr\n\n\tmul\tx5,x11,x11\n\tadds\tx22,x22,x7\n\tumulh\tx11,  x11,x11\n\tadcs\tx23,x23,x8\n\tmul\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tmul\tx9,x15,x13\n\tadc\tx25,x25,x10\n\tmul\tx10,x16,x13\n\n\tadds\tx23,x23,x8\n\tumulh\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tumulh\tx9,x15,x13\n\tadcs\tx25,x25,x10\n\tumulh\tx10,x16,x13\n\tadc\tx26,xzr,xzr\n\n\tmul\tx6,x12,x12\n\tadds\tx24,x24,x8\n\tumulh\tx12,  x12,x12\n\tadcs\tx25,x25,x9\n\tmul\tx9,x15,x14\n\tadc\tx26,x26,x10\n\tmul\tx10,x16,x14\n\n\tadds\tx25,x25,x9\n\tumulh\tx9,x15,x14\n\tadcs\tx26,x26,x10\n\tumulh\tx10,x16,x14\n\tadc\tx27,xzr,xzr\n\tmul\tx7,x13,x13\n\tadds\tx26,x26,x9\n\tumulh\tx13,  x13,x13\n\tadc\tx27,x27,x10\n\tmul\tx8,x14,x14\n\n\tmul\tx10,x16,x15\n\tumulh\tx14,  x14,x14\n\tadds\tx27,x27,x10\n\tumulh\tx10,x16,x15\n\tmul\tx9,x15,x15\n\tadc\tx28,x10,xzr\n\n\tadds\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadcs\tx22,x22,x22\n\tadcs\tx23,x23,x23\n\tadcs\tx24,x24,x24\n\tadcs\tx25,x25,x25\n\tadcs\tx26,x26,x26\n\tumulh\tx15,  x15,x15\n\tadcs\tx27,x27,x27\n\tmul\tx10,x16,x16\n\tadcs\tx28,x28,x28\n\tumulh\tx16,  x16,x16\n\tadc\tx1,xzr,xzr\n\n\tadds\tx19,x19,x11\n\tadcs\tx20,x20,x6\n\tadcs\tx21,x21,x12\n\tadcs\tx22,x22,x7\n\tadcs\tx23,x23,x13\n\tadcs\tx24,x24,x8\n\tadcs\tx25,x25,x14\n\tstp\tx5,x19,[x0]\n\tadcs\tx26,x26,x9\n\tstp\tx20,x21,[x0,#16]\n\tadcs\tx27,x27,x15\n\tstp\tx22,x23,[x0,#32]\n\tadcs\tx28,x28,x10\n\tstp\tx24,x25,[x0,#48]\n\tadc\tx16,x16,x1\n\tstp\tx26,x27,[x0,#64]\n\tstp\tx28,x16,[x0,#80]\n\n\tret\n.size\t__sqr_384,.-__sqr_384\n.globl\tsqr_384\n.hidden\tsqr_384\n.type\tsqr_384,%function\n.align\t5\nsqr_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_384,.-sqr_384\n\n.globl\tredc_mont_384\n.hidden\tredc_mont_384\n.type\tredc_mont_384,%function\n.align\t5\nredc_mont_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tredc_mont_384,.-redc_mont_384\n\n.globl\tfrom_mont_384\n.hidden\tfrom_mont_384\n.type\tfrom_mont_384,%function\n.align\t5\nfrom_mont_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tfrom_mont_384,.-from_mont_384\n\n.type\t__mul_by_1_mont_384,%function\n.align\t5\n__mul_by_1_mont_384:\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tmul\tx26,x4,x11\n\tldp\tx15,x16,[x1,#32]\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\tret\n.size\t__mul_by_1_mont_384,.-__mul_by_1_mont_384\n\n.type\t__redc_tail_mont_384,%function\n.align\t5\n__redc_tail_mont_384:\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\t// accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n.size\t__redc_tail_mont_384,.-__redc_tail_mont_384\n\n.globl\tmul_384\n.hidden\tmul_384\n.type\tmul_384,%function\n.align\t5\nmul_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_384,.-mul_384\n\n.type\t__mul_384,%function\n.align\t5\n__mul_384:\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\n\tumulh\tx5,x11,x17\n\tumulh\tx6,x12,x17\n\tumulh\tx7,x13,x17\n\tumulh\tx8,x14,x17\n\tumulh\tx9,x15,x17\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,8*1]\n\n\tstr\tx19,[x0]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(1+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*1]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(2+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*2]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(3+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*3]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(4+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*4]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*5]\n\tadds\tx19,x20,x5\n\tadcs\tx20,x21,x6\n\tadcs\tx21,x22,x7\n\tadcs\tx22,x23,x8\n\tadcs\tx23,x24,x9\n\tadc\tx24,x25,x10\n\n\tstp\tx19,x20,[x0,#48]\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tret\n.size\t__mul_384,.-__mul_384\n\n.globl\tmul_382x\n.hidden\tmul_382x\n.type\tmul_382x,%function\n.align\t5\nmul_382x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\t\t// space for two 384-bit vectors\n\n\tldp\tx11,x12,[x1]\n\tmov\tx26,x0\t\t// save r_ptr\n\tldp\tx19,x20,[x1,#48]\n\tmov\tx27,x1\t\t// save a_ptr\n\tldp\tx13,x14,[x1,#16]\n\tmov\tx28,x2\t\t// save b_ptr\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx15,x16,[x1,#32]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx6,x12,x20\n\tldp\tx11,x12,[x2]\n\tadcs\tx7,x13,x21\n\tldp\tx19,x20,[x2,#48]\n\tadcs\tx8,x14,x22\n\tldp\tx13,x14,[x2,#16]\n\tadcs\tx9,x15,x23\n\tldp\tx21,x22,[x2,#64]\n\tadc\tx10,x16,x24\n\tldp\tx15,x16,[x2,#32]\n\n\tstp\tx5,x6,[sp]\n\tadds\tx5,x11,x19\t// t1 = b->re + b->im\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx6,x12,x20\n\tstp\tx7,x8,[sp,#16]\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tstp\tx9,x10,[sp,#32]\n\tadcs\tx9,x15,x23\n\tstp\tx5,x6,[sp,#48]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[sp,#64]\n\tstp\tx9,x10,[sp,#80]\n\n\tbl\t__mul_384\t\t// mul_384(ret->re, a->re, b->re)\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#48\n\tadd\tx0,x26,#96\n\tbl\t__mul_384\n\n\tadd\tx1,x27,#48\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tadd\tx1,x26,#96\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#96\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,x26,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx1,x26,#0\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tmul_382x,.-mul_382x\n\n.globl\tsqr_382x\n.hidden\tsqr_382x\n.type\tsqr_382x,%function\n.align\t5\nsqr_382x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx13,x14,[x1,#16]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx21,x22,[x1,#64]\n\tadcs\tx6,x12,x20\n\tldp\tx15,x16,[x1,#32]\n\tadcs\tx7,x13,x21\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx8,x14,x22\n\tstp\tx5,x6,[x0]\n\tadcs\tx9,x15,x23\n\tldp\tx5,x6,[x2]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[x0,#16]\n\n\tsubs\tx11,x11,x19\t// t1 = a->re - a->im\n\tldp\tx7,x8,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tstp\tx9,x10,[x0,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx9,x10,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x25\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x25\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x25\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x25\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tmov\tx4,x1\t\t// save a_ptr\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\n\n\tadd\tx1,x4,#0\n\tadd\tx2,x4,#48\n\tadd\tx0,x0,#96\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x0]\n\tldp\tx13,x14,[x0,#16]\n\tadds\tx11,x11,x11\t// add with itself\n\tldp\tx15,x16,[x0,#32]\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadcs\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tstp\tx11,x12,[x0]\n\tadcs\tx21,x21,x21\n\tstp\tx13,x14,[x0,#16]\n\tadcs\tx22,x22,x22\n\tstp\tx15,x16,[x0,#32]\n\tadcs\tx23,x23,x23\n\tstp\tx19,x20,[x0,#48]\n\tadc\tx24,x24,x24\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_382x,.-sqr_382x\n\n.globl\tsqr_mont_382x\n.hidden\tsqr_mont_382x\n.type\tsqr_mont_382x,%function\n.align\t5\nsqr_mont_382x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#112\t\t// space for two 384-bit vectors + word\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx17,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx5,x11,x17\t// t0 = a->re + a->im\n\tadcs\tx6,x12,x20\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tadcs\tx9,x15,x23\n\tadc\tx10,x16,x24\n\n\tsubs\tx19,x11,x17\t// t1 = a->re - a->im\n\tsbcs\tx20,x12,x20\n\tsbcs\tx21,x13,x21\n\tsbcs\tx22,x14,x22\n\tsbcs\tx23,x15,x23\n\tsbcs\tx24,x16,x24\n\tsbc\tx25,xzr,xzr\t\t// borrow flag as mask\n\n\tstp\tx5,x6,[sp]\n\tstp\tx7,x8,[sp,#16]\n\tstp\tx9,x10,[sp,#32]\n\tstp\tx19,x20,[sp,#48]\n\tstp\tx21,x22,[sp,#64]\n\tstp\tx23,x24,[sp,#80]\n\tstr\tx25,[sp,#96]\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx19,x11,x11\t// add with itself\n\tadcs\tx20,x12,x12\n\tadcs\tx21,x13,x13\n\tadcs\tx22,x14,x14\n\tadcs\tx23,x15,x15\n\tadc\tx24,x16,x16\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tldp\tx11,x12,[sp]\n\tldr\tx17,[sp,#48]\n\tldp\tx13,x14,[sp,#16]\n\tldp\tx15,x16,[sp,#32]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, t0, t1)\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldr\tx25,[sp,#96]\t// account for sign from a->re - a->im\n\tldp\tx19,x20,[sp]\n\tldp\tx21,x22,[sp,#16]\n\tldp\tx23,x24,[sp,#32]\n\n\tand\tx19,x19,x25\n\tand\tx20,x20,x25\n\tand\tx21,x21,x25\n\tand\tx22,x22,x25\n\tand\tx23,x23,x25\n\tand\tx24,x24,x25\n\n\tsubs\tx11,x11,x19\n\tsbcs\tx12,x12,x20\n\tsbcs\tx13,x13,x21\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tand\tx21,x7,x25\n\tand\tx22,x8,x25\n\tand\tx23,x9,x25\n\tand\tx24,x10,x25\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#112\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsqr_mont_382x,.-sqr_mont_382x\n\n.type\t__mul_mont_383_nonred,%function\n.align\t5\n__mul_mont_383_nonred:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*1]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*2]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*3]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*4]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*5]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\t\t// pull r_ptr\n\n\tadds\tx11,x20,x26\n\tadcs\tx12,x21,x27\n\tadcs\tx13,x22,x28\n\tadcs\tx14,x23,x0\n\tadcs\tx15,x24,x1\n\tadcs\tx16,x25,x3\n\n\tret\n.size\t__mul_mont_383_nonred,.-__mul_mont_383_nonred\n\n.globl\tsgn0_pty_mont_384\n.hidden\tsgn0_pty_mont_384\n.type\tsgn0_pty_mont_384,%function\n.align\t5\nsgn0_pty_mont_384:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\tadds\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsgn0_pty_mont_384,.-sgn0_pty_mont_384\n\n.globl\tsgn0_pty_mont_384x\n.hidden\tsgn0_pty_mont_384x\n.type\tsgn0_pty_mont_384x,%function\n.align\t5\nsgn0_pty_mont_384x:\n\thint\t#PACI_HINT\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tadd\tx1,x1,#48\n\n\tand\tx2,x11,#1\n\torr\tx3,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\torr\tx3,x3,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx2,x2,x17\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\torr\tx1,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\torr\tx1,x1,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t\t// pack sign and parity\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#AUTI_HINT\n\tret\n.size\tsgn0_pty_mont_384x,.-sgn0_pty_mont_384x\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/mulq_mont_256-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n.globl\tmul_mont_sparse_256\n.hidden\tmul_mont_sparse_256\n.type\tmul_mont_sparse_256,@function\n.align\t32\nmul_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_mont_sparse_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t0(%rdx),%rax\n\tmovq\t0(%rsi),%r13\n\tmovq\t8(%rsi),%r14\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%rbp\n\tmovq\t%rdx,%rbx\n\n\tmovq\t%rax,%r15\n\tmulq\t%r13\n\tmovq\t%rax,%r9\n\tmovq\t%r15,%rax\n\tmovq\t%rdx,%r10\n\tcall\t__mulq_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_mont_sparse_256,.-mul_mont_sparse_256\n\n.globl\tsqr_mont_sparse_256\n.hidden\tsqr_mont_sparse_256\n.type\tsqr_mont_sparse_256,@function\n.align\t32\nsqr_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_sparse_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t%rcx,%r8\n\tmovq\t8(%rsi),%r14\n\tmovq\t%rdx,%rcx\n\tmovq\t16(%rsi),%r12\n\tleaq\t(%rsi),%rbx\n\tmovq\t24(%rsi),%rbp\n\n\tmovq\t%rax,%r15\n\tmulq\t%rax\n\tmovq\t%rax,%r9\n\tmovq\t%r15,%rax\n\tmovq\t%rdx,%r10\n\tcall\t__mulq_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_mont_sparse_256,.-sqr_mont_sparse_256\n.type\t__mulq_mont_sparse_256,@function\n.align\t32\n__mulq_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmulq\t%r14\n\taddq\t%rax,%r10\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t%r12\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\txorq\t%r14,%r14\n\tmovq\t%rdx,%r13\n\n\tmovq\t%r9,%rdi\n\timulq\t%r8,%r9\n\n\n\tmovq\t%rax,%r15\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t%rdx,%r14\n\txorq\t%r15,%r15\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tmovq\t%r10,%rdi\n\timulq\t%r8,%r10\n\n\n\tmovq\t%rax,%r9\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t%rdx,%r15\n\txorq\t%r9,%r9\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%r9\n\tmovq\t%r11,%rdi\n\timulq\t%r8,%r11\n\n\n\tmovq\t%rax,%r10\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t%rdx,%r9\n\txorq\t%r10,%r10\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r15\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\timulq\t%r8,%rax\n\tmovq\t8(%rsp),%rsi\n\n\n\tmovq\t%rax,%r11\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r12\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\tmovq\t%r14,%rbx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\n\n\n\tmovq\t%r15,%r12\n\tsubq\t0(%rcx),%r13\n\tsbbq\t8(%rcx),%r14\n\tsbbq\t16(%rcx),%r15\n\tmovq\t%r9,%rbp\n\tsbbq\t24(%rcx),%r9\n\tsbbq\t$0,%r10\n\n\tcmovcq\t%rax,%r13\n\tcmovcq\t%rbx,%r14\n\tcmovcq\t%r12,%r15\n\tmovq\t%r13,0(%rsi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r14,8(%rsi)\n\tmovq\t%r15,16(%rsi)\n\tmovq\t%r9,24(%rsi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\t__mulq_mont_sparse_256,.-__mulq_mont_sparse_256\n.globl\tfrom_mont_256\n.hidden\tfrom_mont_256\n.type\tfrom_mont_256,@function\n.align\t32\nfrom_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tfrom_mont_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_256\n\n\n\n\n\n\tmovq\t%r14,%r10\n\tmovq\t%r15,%r11\n\tmovq\t%r9,%r12\n\n\tsubq\t0(%rbx),%r13\n\tsbbq\t8(%rbx),%r14\n\tsbbq\t16(%rbx),%r15\n\tsbbq\t24(%rbx),%r9\n\n\tcmovncq\t%r13,%rax\n\tcmovncq\t%r14,%r10\n\tcmovncq\t%r15,%r11\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r9,%r12\n\tmovq\t%r10,8(%rdi)\n\tmovq\t%r11,16(%rdi)\n\tmovq\t%r12,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tfrom_mont_256,.-from_mont_256\n\n.globl\tredc_mont_256\n.hidden\tredc_mont_256\n.type\tredc_mont_256,@function\n.align\t32\nredc_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tredc_mont_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_256\n\n\taddq\t32(%rsi),%r13\n\tadcq\t40(%rsi),%r14\n\tmovq\t%r13,%rax\n\tadcq\t48(%rsi),%r15\n\tmovq\t%r14,%r10\n\tadcq\t56(%rsi),%r9\n\tsbbq\t%rsi,%rsi\n\n\n\n\n\tmovq\t%r15,%r11\n\tsubq\t0(%rbx),%r13\n\tsbbq\t8(%rbx),%r14\n\tsbbq\t16(%rbx),%r15\n\tmovq\t%r9,%r12\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t$0,%rsi\n\n\tcmovncq\t%r13,%rax\n\tcmovncq\t%r14,%r10\n\tcmovncq\t%r15,%r11\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r9,%r12\n\tmovq\t%r10,8(%rdi)\n\tmovq\t%r11,16(%rdi)\n\tmovq\t%r12,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tredc_mont_256,.-redc_mont_256\n.type\t__mulq_by_1_mont_256,@function\n.align\t32\n__mulq_by_1_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r10\n\tmovq\t16(%rsi),%r11\n\tmovq\t24(%rsi),%r12\n\n\tmovq\t%rax,%r13\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r9\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r13\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t16(%rbx)\n\tmovq\t%r10,%r14\n\timulq\t%rcx,%r10\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\tmovq\t%r11,%r15\n\timulq\t%rcx,%r11\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\tmovq\t%r12,%r9\n\timulq\t%rcx,%r12\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulq_by_1_mont_256,.-__mulq_by_1_mont_256\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/mulq_mont_384-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n\n\n\n\n\n\n.type\t__subq_mod_384x384,@function\n.align\t32\n__subq_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__subq_mod_384x384,.-__subq_mod_384x384\n\n.type\t__addq_mod_384,@function\n.align\t32\n__addq_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__addq_mod_384,.-__addq_mod_384\n\n.type\t__subq_mod_384,@function\n.align\t32\n__subq_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__subq_mod_384_a_is_loaded:\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__subq_mod_384,.-__subq_mod_384\n.globl\tmul_mont_384x\n.hidden\tmul_mont_384x\n.type\tmul_mont_384x,@function\n.align\t32\nmul_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_mont_384x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$328,%rsp\n.cfi_adjust_cfa_offset\t328\n\n\n\tmovq\t%rdx,%rbx\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\tmovq\t%r8,0(%rsp)\n\n\n\n\n\tleaq\t40(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tleaq\t48(%rbx),%rbx\n\tleaq\t48(%rsi),%rsi\n\tleaq\t40+96(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t8(%rsp),%rcx\n\tleaq\t-48(%rsi),%rdx\n\tleaq\t40+192+48(%rsp),%rdi\n\tcall\t__addq_mod_384\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__addq_mod_384\n\n\tleaq\t(%rdi),%rbx\n\tleaq\t48(%rdi),%rsi\n\tcall\t__mulq_384\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t40(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tcall\t__subq_mod_384x384\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t40(%rsp),%rsi\n\tleaq\t40+96(%rsp),%rdx\n\tleaq\t40(%rsp),%rdi\n\tcall\t__subq_mod_384x384\n\n\tmovq\t%rcx,%rbx\n\n\n\tleaq\t40(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\n\tleaq\t40+192(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tleaq\t328(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-328-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_mont_384x,.-mul_mont_384x\n.globl\tsqr_mont_384x\n.hidden\tsqr_mont_384x\n.type\tsqr_mont_384x,@function\n.align\t32\nsqr_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_384x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__addq_mod_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32+48(%rsp),%rdi\n\tcall\t__subq_mod_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tcall\t__mulq_mont_384\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tmovq\t%r14,%r12\n\tadcq\t%r9,%r9\n\tmovq\t%r15,%r13\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%rax\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r10,%rbp\n\tsbbq\t16(%rcx),%r8\n\tsbbq\t24(%rcx),%r9\n\tsbbq\t32(%rcx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r12,%r14\n\tcmovcq\t%r13,%r15\n\tcmovcq\t%rax,%r8\n\tmovq\t%r14,48(%rdi)\n\tcmovcq\t%rbx,%r9\n\tmovq\t%r15,56(%rdi)\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r8,64(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r10,80(%rdi)\n\tmovq\t%r11,88(%rdi)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rax\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%r12\n\tmovq\t32+24(%rsp),%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_mont_384x,.-sqr_mont_384x\n\n.globl\tmul_382x\n.hidden\tmul_382x\n.type\tmul_382x,@function\n.align\t32\nmul_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_382x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tleaq\t96(%rdi),%rdi\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t48(%rsi),%r8\n\tadcq\t56(%rsi),%r9\n\tadcq\t64(%rsi),%r10\n\tadcq\t72(%rsi),%r11\n\tadcq\t80(%rsi),%r12\n\tadcq\t88(%rsi),%r13\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t32(%rdx),%r12\n\tmovq\t40(%rdx),%r13\n\n\taddq\t48(%rdx),%r8\n\tadcq\t56(%rdx),%r9\n\tadcq\t64(%rdx),%r10\n\tadcq\t72(%rdx),%r11\n\tadcq\t80(%rdx),%r12\n\tadcq\t88(%rdx),%r13\n\n\tmovq\t%r8,32+48(%rsp)\n\tmovq\t%r9,32+56(%rsp)\n\tmovq\t%r10,32+64(%rsp)\n\tmovq\t%r11,32+72(%rsp)\n\tmovq\t%r12,32+80(%rsp)\n\tmovq\t%r13,32+88(%rsp)\n\n\n\tleaq\t32+0(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\tcall\t__mulq_384\n\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rbx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__mulq_384\n\n\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rbx),%rbx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t32(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tmovq\t%rsi,%rdi\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t0(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t-96(%rdi),%rsi\n\tleaq\t32(%rsp),%rdx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__subq_mod_384x384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_382x,.-mul_382x\n.globl\tsqr_382x\n.hidden\tsqr_382x\n.type\tsqr_382x,@function\n.align\t32\nsqr_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_382x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rcx\n\n\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%rbx\n\tmovq\t32(%rsi),%rbp\n\tmovq\t40(%rsi),%rdx\n\n\tmovq\t%r14,%r8\n\taddq\t48(%rsi),%r14\n\tmovq\t%r15,%r9\n\tadcq\t56(%rsi),%r15\n\tmovq\t%rax,%r10\n\tadcq\t64(%rsi),%rax\n\tmovq\t%rbx,%r11\n\tadcq\t72(%rsi),%rbx\n\tmovq\t%rbp,%r12\n\tadcq\t80(%rsi),%rbp\n\tmovq\t%rdx,%r13\n\tadcq\t88(%rsi),%rdx\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%rax,16(%rdi)\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rdx,40(%rdi)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__subq_mod_384_a_is_loaded\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-48(%rdi),%rbx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\tleaq\t96(%rdi),%rdi\n\tcall\t__mulq_384\n\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\tmovq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%r15\n\tmovq\t64(%rdi),%rax\n\tmovq\t72(%rdi),%rbx\n\tmovq\t80(%rdi),%rbp\n\taddq\t%r8,%r8\n\tmovq\t88(%rdi),%rdx\n\tadcq\t%r9,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%r10,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%r11,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%r12,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%r13,%r13\n\tmovq\t%r12,32(%rdi)\n\tadcq\t%r14,%r14\n\tmovq\t%r13,40(%rdi)\n\tadcq\t%r15,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%rax,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%rbx,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%rbp,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%rdx,%rdx\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*7\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_382x,.-sqr_382x\n.globl\tmul_384\n.hidden\tmul_384\n.type\tmul_384,@function\n.align\t32\nmul_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_384\n\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_384,.-mul_384\n\n.type\t__mulq_384,@function\n.align\t32\n__mulq_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rbx),%rax\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%rbp,%rax\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,8(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,16(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,24(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,32(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,40(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rax,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rcx,48(%rdi)\n\tmovq\t%r8,56(%rdi)\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r10,72(%rdi)\n\tmovq\t%r11,80(%rdi)\n\tmovq\t%r12,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulq_384,.-__mulq_384\n.globl\tsqr_384\n.hidden\tsqr_384\n.type\tsqr_384,@function\n.align\t32\nsqr_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tcall\t__sqrq_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_384,.-sqr_384\n\n.type\t__sqrq_384,@function\n.align\t32\n__sqrq_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rcx\n\tmovq\t24(%rsi),%rbx\n\n\n\tmovq\t%rax,%r14\n\tmulq\t%r15\n\tmovq\t%rax,%r9\n\tmovq\t%r14,%rax\n\tmovq\t32(%rsi),%rbp\n\tmovq\t%rdx,%r10\n\n\tmulq\t%rcx\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t40(%rsi),%rsi\n\tmovq\t%rdx,%r11\n\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t%rsi\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t%rax\n\txorq\t%r8,%r8\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r15,%rax\n\taddq\t%r9,%r9\n\tadcq\t$0,%r8\n\taddq\t%rdx,%r9\n\tadcq\t$0,%r8\n\tmovq\t%r9,8(%rdi)\n\n\tmulq\t%rcx\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rsi\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t%rax\n\txorq\t%r9,%r9\n\taddq\t%rax,%r8\n\tmovq\t%rcx,%rax\n\taddq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%r9\n\taddq\t%r8,%r10\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r9\n\tmovq\t%r10,16(%rdi)\n\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rbp\n\taddq\t%rax,%r14\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rsi\n\taddq\t%rax,%r15\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rcx\n\n\tmulq\t%rax\n\txorq\t%r11,%r11\n\taddq\t%rax,%r9\n\tmovq\t%rbx,%rax\n\taddq\t%r12,%r12\n\tadcq\t%r13,%r13\n\tadcq\t$0,%r11\n\taddq\t%r9,%r12\n\tadcq\t%rdx,%r13\n\tadcq\t$0,%r11\n\tmovq\t%r12,32(%rdi)\n\n\n\tmulq\t%rbp\n\taddq\t%rax,%r15\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rsi\n\taddq\t%rax,%rcx\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbx\n\n\tmulq\t%rax\n\txorq\t%r12,%r12\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t$0,%r12\n\taddq\t%r11,%r14\n\tadcq\t%rdx,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t$0,%r12\n\tmovq\t%r15,56(%rdi)\n\n\n\tmulq\t%rsi\n\taddq\t%rax,%rbx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t%rax\n\txorq\t%r13,%r13\n\taddq\t%rax,%r12\n\tmovq\t%rsi,%rax\n\taddq\t%rcx,%rcx\n\tadcq\t%rbx,%rbx\n\tadcq\t$0,%r13\n\taddq\t%r12,%rcx\n\tadcq\t%rdx,%rbx\n\tmovq\t%rcx,64(%rdi)\n\tadcq\t$0,%r13\n\tmovq\t%rbx,72(%rdi)\n\n\n\tmulq\t%rax\n\taddq\t%r13,%rax\n\taddq\t%rbp,%rbp\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rax,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__sqrq_384,.-__sqrq_384\n\n.globl\tsqr_mont_384\n.hidden\tsqr_mont_384\n.type\tsqr_mont_384,@function\n.align\t32\nsqr_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$120,%rsp\n.cfi_adjust_cfa_offset\t8*15\n\n\n\tmovq\t%rcx,96(%rsp)\n\tmovq\t%rdx,104(%rsp)\n\tmovq\t%rdi,112(%rsp)\n\n\tmovq\t%rsp,%rdi\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rsp),%rsi\n\tmovq\t96(%rsp),%rcx\n\tmovq\t104(%rsp),%rbx\n\tmovq\t112(%rsp),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tleaq\t120(%rsp),%r8\n\tmovq\t120(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-8*21\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_mont_384,.-sqr_mont_384\n\n\n\n.globl\tredc_mont_384\n.hidden\tredc_mont_384\n.type\tredc_mont_384,@function\n.align\t32\nredc_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tredc_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tredc_mont_384,.-redc_mont_384\n\n\n\n\n.globl\tfrom_mont_384\n.hidden\tfrom_mont_384\n.type\tfrom_mont_384,@function\n.align\t32\nfrom_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tfrom_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_384\n\n\n\n\n\n\tmovq\t%r15,%rcx\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tfrom_mont_384,.-from_mont_384\n.type\t__mulq_by_1_mont_384,@function\n.align\t32\n__mulq_by_1_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rax,%r14\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r8\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r9,%r15\n\timulq\t%rcx,%r9\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r10,%r8\n\timulq\t%rcx,%r10\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r8\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r11,%r9\n\timulq\t%rcx,%r11\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r12,%r10\n\timulq\t%rcx,%r12\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r13,%r11\n\timulq\t%rcx,%r13\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulq_by_1_mont_384,.-__mulq_by_1_mont_384\n\n.type\t__redq_tail_mont_384,@function\n.align\t32\n__redq_tail_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t48(%rsi),%r14\n\tmovq\t%r14,%rax\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tmovq\t%r15,%rcx\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tsbbq\t%r12,%r12\n\n\n\n\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__redq_tail_mont_384,.-__redq_tail_mont_384\n\n.globl\tsgn0_pty_mont_384\n.hidden\tsgn0_pty_mont_384\n.type\tsgn0_pty_mont_384,@function\n.align\t32\nsgn0_pty_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsgn0_pty_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t0(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n\tcall\t__mulq_by_1_mont_384\n\n\txorq\t%rax,%rax\n\tmovq\t%r14,%r13\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsgn0_pty_mont_384,.-sgn0_pty_mont_384\n\n.globl\tsgn0_pty_mont_384x\n.hidden\tsgn0_pty_mont_384x\n.type\tsgn0_pty_mont_384x,@function\n.align\t32\nsgn0_pty_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsgn0_pty_mont_384x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t48(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n\tcall\t__mulq_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\tleaq\t0(%rdi),%rsi\n\txorq\t%rdi,%rdi\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r14,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%r13\n\tandq\t$2,%rdi\n\torq\t%r13,%rdi\n\n\tcall\t__mulq_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\txorq\t%rax,%rax\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%r12\n\n\tnotq\t%rax\n\n\ttestq\t%r14,%r14\n\tcmovzq\t%rdi,%r13\n\n\ttestq\t%r12,%r12\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsgn0_pty_mont_384x,.-sgn0_pty_mont_384x\n.globl\tmul_mont_384\n.hidden\tmul_mont_384\n.type\tmul_mont_384,@function\n.align\t32\nmul_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tmul_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$24,%rsp\n.cfi_adjust_cfa_offset\t8*3\n\n\n\tmovq\t0(%rdx),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\tmovq\t%rdx,%rbx\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\n\tcall\t__mulq_mont_384\n\n\tmovq\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-72\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmul_mont_384,.-mul_mont_384\n.type\t__mulq_mont_384,@function\n.align\t32\n__mulq_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rax,%rdi\n\tmulq\t%r14\n\tmovq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t%r15\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t%r12\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r8,%rbp\n\timulq\t8(%rsp),%r8\n\n\tmulq\t%r13\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\txorq\t%r15,%r15\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t%rdx,%r14\n\tadcq\t$0,%r15\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmovq\t%r9,%rbp\n\timulq\t8(%rsp),%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rsi)\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\txorq\t%r8,%r8\n\taddq\t%rax,%r14\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%r8\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%r8\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmovq\t%r10,%rbp\n\timulq\t8(%rsp),%r10\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rsi)\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\txorq\t%r9,%r9\n\taddq\t%rax,%r15\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r8\n\tadcq\t$0,%r9\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t%rdx,%r8\n\tadcq\t$0,%r9\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmovq\t%r11,%rbp\n\timulq\t8(%rsp),%r11\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rsi)\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\txorq\t%r10,%r10\n\taddq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r8\n\tadcq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r12,%rbp\n\timulq\t8(%rsp),%r12\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\txorq\t%r11,%r11\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\tadcq\t$0,%r11\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t%rdx,%r10\n\tadcq\t$0,%r11\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmovq\t%r13,%rbp\n\timulq\t8(%rsp),%r13\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t40(%rsi)\n\taddq\t%r12,%r10\n\tadcq\t$0,%rdx\n\txorq\t%r12,%r12\n\taddq\t%rax,%r10\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r12\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r12\n\n\n\n\n\tmovq\t16(%rsp),%rdi\n\tsubq\t0(%rcx),%r14\n\tmovq\t%r15,%rdx\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r8,%rbx\n\tsbbq\t16(%rcx),%r8\n\tmovq\t%r9,%rsi\n\tsbbq\t24(%rcx),%r9\n\tmovq\t%r10,%rbp\n\tsbbq\t32(%rcx),%r10\n\tmovq\t%r11,%r13\n\tsbbq\t40(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rdx,%r15\n\tcmovcq\t%rbx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%r13,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulq_mont_384,.-__mulq_mont_384\n.globl\tsqr_n_mul_mont_384\n.hidden\tsqr_n_mul_mont_384\n.type\tsqr_n_mul_mont_384,@function\n.align\t32\nsqr_n_mul_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_n_mul_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t8*17\n\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rcx,16(%rsp)\n\tleaq\t32(%rsp),%rdi\n\tmovq\t%r9,24(%rsp)\n\tmovq\t(%r9),%xmm2\n\n.Loop_sqr_384:\n\tmovd\t%edx,%xmm1\n\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rdi),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t16(%rsp),%rbx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovd\t%xmm1,%edx\n\tleaq\t0(%rdi),%rsi\n\tdecl\t%edx\n\tjnz\t.Loop_sqr_384\n\n.byte\t102,72,15,126,208\n\tmovq\t%rbx,%rcx\n\tmovq\t24(%rsp),%rbx\n\n\n\n\n\n\n\tmovq\t%r8,%r12\n\tmovq\t%r9,%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t136(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-8*23\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_n_mul_mont_384,.-sqr_n_mul_mont_384\n\n.globl\tsqr_n_mul_mont_383\n.hidden\tsqr_n_mul_mont_383\n.type\tsqr_n_mul_mont_383,@function\n.align\t32\nsqr_n_mul_mont_383:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_n_mul_mont_383$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t8*17\n\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rcx,16(%rsp)\n\tleaq\t32(%rsp),%rdi\n\tmovq\t%r9,24(%rsp)\n\tmovq\t(%r9),%xmm2\n\n.Loop_sqr_383:\n\tmovd\t%edx,%xmm1\n\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rdi),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t16(%rsp),%rbx\n\tcall\t__mulq_by_1_mont_384\n\n\tmovd\t%xmm1,%edx\n\taddq\t48(%rsi),%r14\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tleaq\t0(%rdi),%rsi\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r8,16(%rdi)\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tdecl\t%edx\n\tjnz\t.Loop_sqr_383\n\n.byte\t102,72,15,126,208\n\tmovq\t%rbx,%rcx\n\tmovq\t24(%rsp),%rbx\n\n\n\n\n\n\n\tmovq\t%r8,%r12\n\tmovq\t%r9,%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t136(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-8*23\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_n_mul_mont_383,.-sqr_n_mul_mont_383\n.type\t__mulq_mont_383_nonred,@function\n.align\t32\n__mulq_mont_383_nonred:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rax,%rbp\n\tmulq\t%r14\n\tmovq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t%r15\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t%r12\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r8,%r15\n\timulq\t8(%rsp),%r8\n\n\tmulq\t%r13\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rcx)\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t%rdx,%r14\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmovq\t%r9,%r8\n\timulq\t8(%rsp),%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rsi)\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r8\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t24(%rcx)\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t%rdx,%r15\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmovq\t%r10,%r9\n\timulq\t8(%rsp),%r10\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rsi)\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rcx)\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t%rdx,%r8\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmovq\t%r11,%r10\n\timulq\t8(%rsp),%r11\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rsi)\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rcx)\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t%rdx,%r9\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmovq\t%r12,%r11\n\timulq\t8(%rsp),%r12\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rsi)\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rcx)\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t%rdx,%r10\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r13,%r12\n\timulq\t8(%rsp),%r13\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r10\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r12\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t24(%rcx)\n\taddq\t%r12,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r10\n\tadcq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulq_mont_383_nonred,.-__mulq_mont_383_nonred\n.globl\tsqr_mont_382x\n.hidden\tsqr_mont_382x\n.type\tsqr_mont_382x,@function\n.align\t32\nsqr_mont_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,__blst_platform_cap(%rip)\n\tjnz\tsqr_mont_382x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rdi,24(%rsp)\n\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rdx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rbx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rdx\n\tsbbq\t80(%rsi),%rbx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rdi,%rdi\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\tmovq\t%r14,32+48(%rsp)\n\tmovq\t%r15,32+56(%rsp)\n\tmovq\t%rax,32+64(%rsp)\n\tmovq\t%rdx,32+72(%rsp)\n\tmovq\t%rbx,32+80(%rsp)\n\tmovq\t%rbp,32+88(%rsp)\n\tmovq\t%rdi,32+96(%rsp)\n\n\n\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tmovq\t24(%rsp),%rdi\n\tcall\t__mulq_mont_383_nonred\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%r8,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r10,80(%rdi)\n\tmovq\t%r11,88(%rdi)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rax\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%r12\n\tmovq\t32+24(%rsp),%r13\n\n\tcall\t__mulq_mont_383_nonred\n\tmovq\t32+96(%rsp),%rsi\n\tmovq\t32+0(%rsp),%r12\n\tmovq\t32+8(%rsp),%r13\n\tandq\t%rsi,%r12\n\tmovq\t32+16(%rsp),%rax\n\tandq\t%rsi,%r13\n\tmovq\t32+24(%rsp),%rbx\n\tandq\t%rsi,%rax\n\tmovq\t32+32(%rsp),%rbp\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t32+40(%rsp),%rsi\n\n\tsubq\t%r12,%r14\n\tmovq\t0(%rcx),%r12\n\tsbbq\t%r13,%r15\n\tmovq\t8(%rcx),%r13\n\tsbbq\t%rax,%r8\n\tmovq\t16(%rcx),%rax\n\tsbbq\t%rbx,%r9\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t%rbp,%r10\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t%rsi,%r11\n\tsbbq\t%rsi,%rsi\n\n\tandq\t%rsi,%r12\n\tandq\t%rsi,%r13\n\tandq\t%rsi,%rax\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t40(%rcx),%rsi\n\n\taddq\t%r12,%r14\n\tadcq\t%r13,%r15\n\tadcq\t%rax,%r8\n\tadcq\t%rbx,%r9\n\tadcq\t%rbp,%r10\n\tadcq\t%rsi,%r11\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r8,16(%rdi)\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqr_mont_382x,.-sqr_mont_382x\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/mulx_mont_256-x86_64.s",
    "content": ".text\t\n\n.globl\tmulx_mont_sparse_256\n.hidden\tmulx_mont_sparse_256\n.type\tmulx_mont_sparse_256,@function\n.align\t32\nmulx_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nmul_mont_sparse_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbp\n\tmovq\t24(%rsi),%r9\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%rax,%r11\n\tcall\t__mulx_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmulx_mont_sparse_256,.-mulx_mont_sparse_256\n\n.globl\tsqrx_mont_sparse_256\n.hidden\tsqrx_mont_sparse_256\n.type\tsqrx_mont_sparse_256,@function\n.align\t32\nsqrx_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_mont_sparse_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tmovq\t%rcx,%r8\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbp\n\tmovq\t24(%rsi),%r9\n\tleaq\t-128(%rbx),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%rdx,%rax,%r11\n\tcall\t__mulx_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_mont_sparse_256,.-sqrx_mont_sparse_256\n.type\t__mulx_mont_sparse_256,@function\n.align\t32\n__mulx_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmulxq\t%r15,%r15,%r12\n\tmulxq\t%rbp,%rbp,%r13\n\taddq\t%r15,%r11\n\tmulxq\t%r9,%r9,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rbp,%r12\n\tadcq\t%r9,%r13\n\tadcq\t$0,%r14\n\n\tmovq\t%rax,%r10\n\timulq\t%r8,%rax\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r11\n\tadcxq\t%r9,%r12\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r12\n\tadcxq\t%r9,%r13\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r15,%r9\n\tadoxq\t%r9,%r15\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r11,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r12\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r12\n\tadoxq\t%r9,%r13\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rbp,%r13\n\tadoxq\t%r9,%r14\n\tadcxq\t%r10,%r14\n\tadoxq\t%r10,%r15\n\tadcxq\t%r10,%r15\n\tadoxq\t%r10,%r10\n\tadcq\t$0,%r10\n\tmovq\t%rax,%r11\n\timulq\t%r8,%rax\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r12\n\tadcxq\t%r9,%r13\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r9,%r15\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r10,%r9\n\tadoxq\t%r9,%r10\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r11\n\tadoxq\t%r12,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r13\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r13\n\tadoxq\t%r9,%r14\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\tadcxq\t%r11,%r15\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\tadoxq\t%r11,%r11\n\tadcq\t$0,%r11\n\tmovq\t%rax,%r12\n\timulq\t%r8,%rax\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r9,%r15\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r9,%r10\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r10\n\tadcxq\t%r11,%r9\n\tadoxq\t%r9,%r11\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r12\n\tadoxq\t%r13,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r14\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadcxq\t%rbp,%r15\n\tadoxq\t%r9,%r10\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tadcxq\t%r12,%r11\n\tadoxq\t%r12,%r12\n\tadcq\t$0,%r12\n\timulq\t%r8,%rdx\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rcx),%r13,%r9\n\tadcxq\t%rax,%r13\n\tadoxq\t%r9,%r14\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r15\n\tadoxq\t%r9,%r10\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t%r14,%rdx\n\tleaq\t128(%rcx),%rcx\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r9,%r11\n\tmovq\t%r15,%rax\n\tadcxq\t%r13,%r11\n\tadoxq\t%r13,%r12\n\tadcq\t$0,%r12\n\n\n\n\n\tmovq\t%r10,%rbp\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%r9\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rdx,%r14\n\tcmovcq\t%rax,%r15\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%r9,%r11\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulx_mont_sparse_256,.-__mulx_mont_sparse_256\n.globl\tfromx_mont_256\n.hidden\tfromx_mont_256\n.type\tfromx_mont_256,@function\n.align\t32\nfromx_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nfrom_mont_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulx_by_1_mont_256\n\n\n\n\n\n\tmovq\t%r15,%rdx\n\tmovq\t%r10,%r12\n\tmovq\t%r11,%r13\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r10\n\tsbbq\t24(%rbx),%r11\n\n\tcmovncq\t%r14,%rax\n\tcmovncq\t%r15,%rdx\n\tcmovncq\t%r10,%r12\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r11,%r13\n\tmovq\t%rdx,8(%rdi)\n\tmovq\t%r12,16(%rdi)\n\tmovq\t%r13,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tfromx_mont_256,.-fromx_mont_256\n\n.globl\tredcx_mont_256\n.hidden\tredcx_mont_256\n.type\tredcx_mont_256,@function\n.align\t32\nredcx_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nredc_mont_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulx_by_1_mont_256\n\n\taddq\t32(%rsi),%r14\n\tadcq\t40(%rsi),%r15\n\tmovq\t%r14,%rax\n\tadcq\t48(%rsi),%r10\n\tmovq\t%r15,%rdx\n\tadcq\t56(%rsi),%r11\n\tsbbq\t%rsi,%rsi\n\n\n\n\n\tmovq\t%r10,%r12\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r10\n\tmovq\t%r11,%r13\n\tsbbq\t24(%rbx),%r11\n\tsbbq\t$0,%rsi\n\n\tcmovncq\t%r14,%rax\n\tcmovncq\t%r15,%rdx\n\tcmovncq\t%r10,%r12\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r11,%r13\n\tmovq\t%rdx,8(%rdi)\n\tmovq\t%r12,16(%rdi)\n\tmovq\t%r13,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tredcx_mont_256,.-redcx_mont_256\n.type\t__mulx_by_1_mont_256,@function\n.align\t32\n__mulx_by_1_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r11\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tmovq\t%rax,%r14\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\tmovq\t%r11,%r15\n\timulq\t%rcx,%r11\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\tmovq\t%r12,%r10\n\timulq\t%rcx,%r12\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rbx)\n\tmovq\t%r13,%r11\n\timulq\t%rcx,%r13\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulx_by_1_mont_256,.-__mulx_by_1_mont_256\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/mulx_mont_384-x86_64.s",
    "content": ".text\t\n\n\n\n\n\n\n\n.type\t__subx_mod_384x384,@function\n.align\t32\n__subx_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__subx_mod_384x384,.-__subx_mod_384x384\n\n.type\t__addx_mod_384,@function\n.align\t32\n__addx_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__addx_mod_384,.-__addx_mod_384\n\n.type\t__subx_mod_384,@function\n.align\t32\n__subx_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__subx_mod_384_a_is_loaded:\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__subx_mod_384,.-__subx_mod_384\n.globl\tmulx_mont_384x\n.hidden\tmulx_mont_384x\n.type\tmulx_mont_384x,@function\n.align\t32\nmulx_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nmul_mont_384x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$328,%rsp\n.cfi_adjust_cfa_offset\t328\n\n\n\tmovq\t%rdx,%rbx\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\tmovq\t%r8,0(%rsp)\n\n\n\n\n\tleaq\t40(%rsp),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\n\tleaq\t48(%rbx),%rbx\n\tleaq\t128+48(%rsi),%rsi\n\tleaq\t96(%rdi),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t8(%rsp),%rcx\n\tleaq\t(%rbx),%rsi\n\tleaq\t-48(%rbx),%rdx\n\tleaq\t40+192+48(%rsp),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t-48(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tleaq\t(%rdi),%rbx\n\tleaq\t48(%rdi),%rsi\n\tcall\t__mulx_384\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t40(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t40(%rsp),%rsi\n\tleaq\t40+96(%rsp),%rdx\n\tleaq\t40(%rsp),%rdi\n\tcall\t__subx_mod_384x384\n\n\tleaq\t(%rcx),%rbx\n\n\n\tleaq\t40(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\n\tleaq\t40+192(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tleaq\t328(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-328-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmulx_mont_384x,.-mulx_mont_384x\n.globl\tsqrx_mont_384x\n.hidden\tsqrx_mont_384x\n.type\tsqrx_mont_384x,@function\n.align\t32\nsqrx_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_mont_384x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__addx_mod_384\n\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32+48(%rsp),%rdi\n\tcall\t__subx_mod_384\n\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rsi),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\taddq\t%rdx,%rdx\n\tadcq\t%r15,%r15\n\tadcq\t%rax,%rax\n\tmovq\t%rdx,%r8\n\tadcq\t%r12,%r12\n\tmovq\t%r15,%r9\n\tadcq\t%rdi,%rdi\n\tmovq\t%rax,%r10\n\tadcq\t%rbp,%rbp\n\tmovq\t%r12,%r11\n\tsbbq\t%rsi,%rsi\n\n\tsubq\t0(%rcx),%rdx\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%rdi,%r13\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%r12\n\tsbbq\t32(%rcx),%rdi\n\tmovq\t%rbp,%r14\n\tsbbq\t40(%rcx),%rbp\n\tsbbq\t$0,%rsi\n\n\tcmovcq\t%r8,%rdx\n\tcmovcq\t%r9,%r15\n\tcmovcq\t%r10,%rax\n\tmovq\t%rdx,48(%rbx)\n\tcmovcq\t%r11,%r12\n\tmovq\t%r15,56(%rbx)\n\tcmovcq\t%r13,%rdi\n\tmovq\t%rax,64(%rbx)\n\tcmovcq\t%r14,%rbp\n\tmovq\t%r12,72(%rbx)\n\tmovq\t%rdi,80(%rbx)\n\tmovq\t%rbp,88(%rbx)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rdx\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%rax\n\tmovq\t32+24(%rsp),%r12\n\tmovq\t32+32(%rsp),%rdi\n\tmovq\t32+40(%rsp),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_mont_384x,.-sqrx_mont_384x\n\n.globl\tmulx_382x\n.hidden\tmulx_382x\n.type\tmulx_382x,@function\n.align\t32\nmulx_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nmul_382x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tleaq\t96(%rdi),%rdi\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t48(%rsi),%r8\n\tadcq\t56(%rsi),%r9\n\tadcq\t64(%rsi),%r10\n\tadcq\t72(%rsi),%r11\n\tadcq\t80(%rsi),%r12\n\tadcq\t88(%rsi),%r13\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t32(%rdx),%r12\n\tmovq\t40(%rdx),%r13\n\n\taddq\t48(%rdx),%r8\n\tadcq\t56(%rdx),%r9\n\tadcq\t64(%rdx),%r10\n\tadcq\t72(%rdx),%r11\n\tadcq\t80(%rdx),%r12\n\tadcq\t88(%rdx),%r13\n\n\tmovq\t%r8,32+48(%rsp)\n\tmovq\t%r9,32+56(%rsp)\n\tmovq\t%r10,32+64(%rsp)\n\tmovq\t%r11,32+72(%rsp)\n\tmovq\t%r12,32+80(%rsp)\n\tmovq\t%r13,32+88(%rsp)\n\n\n\tleaq\t32+0(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\tcall\t__mulx_384\n\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rbx\n\tleaq\t-96(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\n\tleaq\t48+128(%rsi),%rsi\n\tleaq\t48(%rbx),%rbx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t32(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tmovq\t%rsi,%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t0(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t-96(%rdi),%rsi\n\tleaq\t32(%rsp),%rdx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__subx_mod_384x384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmulx_382x,.-mulx_382x\n.globl\tsqrx_382x\n.hidden\tsqrx_382x\n.type\tsqrx_382x,@function\n.align\t32\nsqrx_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_382x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rcx\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%rbx\n\tmovq\t32(%rsi),%rbp\n\tmovq\t40(%rsi),%rdx\n\n\tmovq\t%r14,%r8\n\taddq\t48(%rsi),%r14\n\tmovq\t%r15,%r9\n\tadcq\t56(%rsi),%r15\n\tmovq\t%rax,%r10\n\tadcq\t64(%rsi),%rax\n\tmovq\t%rbx,%r11\n\tadcq\t72(%rsi),%rbx\n\tmovq\t%rbp,%r12\n\tadcq\t80(%rsi),%rbp\n\tmovq\t%rdx,%r13\n\tadcq\t88(%rsi),%rdx\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%rax,16(%rdi)\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rdx,40(%rdi)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__subx_mod_384_a_is_loaded\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-48(%rdi),%rbx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\tleaq\t96(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\tmovq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%r15\n\tmovq\t64(%rdi),%rax\n\tmovq\t72(%rdi),%rbx\n\tmovq\t80(%rdi),%rbp\n\taddq\t%r8,%r8\n\tmovq\t88(%rdi),%rdx\n\tadcq\t%r9,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%r10,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%r11,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%r12,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%r13,%r13\n\tmovq\t%r12,32(%rdi)\n\tadcq\t%r14,%r14\n\tmovq\t%r13,40(%rdi)\n\tadcq\t%r15,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%rax,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%rbx,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%rbp,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%rdx,%rdx\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*7\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_382x,.-sqrx_382x\n.globl\tmulx_384\n.hidden\tmulx_384\n.type\tmulx_384,@function\n.align\t32\nmulx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nmul_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmovq\t0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-48\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmulx_384,.-mulx_384\n\n.type\t__mulx_384,@function\n.align\t32\n__mulx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rbx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tleaq\t-128(%rsi),%rsi\n\n\tmulxq\t%r14,%r9,%rcx\n\txorq\t%rbp,%rbp\n\n\tmulxq\t%r15,%r8,%rax\n\tadcxq\t%rcx,%r8\n\tmovq\t%r9,0(%rdi)\n\n\tmulxq\t%r10,%r9,%rcx\n\tadcxq\t%rax,%r9\n\n\tmulxq\t%r11,%r10,%rax\n\tadcxq\t%rcx,%r10\n\n\tmulxq\t%r12,%r11,%rcx\n\tadcxq\t%rax,%r11\n\n\tmulxq\t%r13,%r12,%r13\n\tmovq\t8(%rbx),%rdx\n\tadcxq\t%rcx,%r12\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,8(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,16(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,24(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,32(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,40(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t%rax,%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tmovq\t%r10,64(%rdi)\n\tmovq\t%r11,72(%rdi)\n\tmovq\t%r12,80(%rdi)\n\tmovq\t%r13,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulx_384,.-__mulx_384\n.globl\tsqrx_384\n.hidden\tsqrx_384\n.type\tsqrx_384,@function\n.align\t32\nsqrx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__sqrx_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_384,.-sqrx_384\n.type\t__sqrx_384,@function\n.align\t32\n__sqrx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r14\n\tmovq\t16(%rsi),%r15\n\tmovq\t24(%rsi),%rcx\n\tmovq\t32(%rsi),%rbx\n\n\n\tmulxq\t%r14,%r8,%rdi\n\tmovq\t40(%rsi),%rbp\n\tmulxq\t%r15,%r9,%rax\n\taddq\t%rdi,%r9\n\tmulxq\t%rcx,%r10,%rdi\n\tadcq\t%rax,%r10\n\tmulxq\t%rbx,%r11,%rax\n\tadcq\t%rdi,%r11\n\tmulxq\t%rbp,%r12,%r13\n\tmovq\t%r14,%rdx\n\tadcq\t%rax,%r12\n\tadcq\t$0,%r13\n\n\n\txorq\t%r14,%r14\n\tmulxq\t%r15,%rdi,%rax\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rax,%r11\n\n\tmulxq\t%rcx,%rdi,%rax\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rax,%r12\n\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rax,%r13\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%r15,%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%r14,%rax\n\tadcxq\t%rax,%r14\n\n\n\txorq\t%r15,%r15\n\tmulxq\t%rcx,%rdi,%rax\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rax,%r13\n\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rax,%r14\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%r15,%rax\n\tadcxq\t%rax,%r15\n\n\n\txorq\t%rcx,%rcx\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rax,%r15\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%rbx,%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rcx,%rax\n\tadcxq\t%rax,%rcx\n\n\n\tmulxq\t%rbp,%rdi,%rbx\n\tmovq\t0(%rsi),%rdx\n\taddq\t%rdi,%rcx\n\tmovq\t8(%rsp),%rdi\n\tadcq\t$0,%rbx\n\n\n\txorq\t%rbp,%rbp\n\tadcxq\t%r8,%r8\n\tadcxq\t%r9,%r9\n\tadcxq\t%r10,%r10\n\tadcxq\t%r11,%r11\n\tadcxq\t%r12,%r12\n\n\n\tmulxq\t%rdx,%rdx,%rax\n\tmovq\t%rdx,0(%rdi)\n\tmovq\t8(%rsi),%rdx\n\tadoxq\t%rax,%r8\n\tmovq\t%r8,8(%rdi)\n\n\tmulxq\t%rdx,%r8,%rax\n\tmovq\t16(%rsi),%rdx\n\tadoxq\t%r8,%r9\n\tadoxq\t%rax,%r10\n\tmovq\t%r9,16(%rdi)\n\tmovq\t%r10,24(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t24(%rsi),%rdx\n\tadoxq\t%r8,%r11\n\tadoxq\t%r9,%r12\n\tadcxq\t%r13,%r13\n\tadcxq\t%r14,%r14\n\tmovq\t%r11,32(%rdi)\n\tmovq\t%r12,40(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t32(%rsi),%rdx\n\tadoxq\t%r8,%r13\n\tadoxq\t%r9,%r14\n\tadcxq\t%r15,%r15\n\tadcxq\t%rcx,%rcx\n\tmovq\t%r13,48(%rdi)\n\tmovq\t%r14,56(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t40(%rsi),%rdx\n\tadoxq\t%r8,%r15\n\tadoxq\t%r9,%rcx\n\tadcxq\t%rbx,%rbx\n\tadcxq\t%rbp,%rbp\n\tmovq\t%r15,64(%rdi)\n\tmovq\t%rcx,72(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tadoxq\t%r8,%rbx\n\tadoxq\t%r9,%rbp\n\n\tmovq\t%rbx,80(%rdi)\n\tmovq\t%rbp,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__sqrx_384,.-__sqrx_384\n\n\n\n.globl\tredcx_mont_384\n.hidden\tredcx_mont_384\n.type\tredcx_mont_384,@function\n.align\t32\nredcx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nredc_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tredcx_mont_384,.-redcx_mont_384\n\n\n\n\n.globl\tfromx_mont_384\n.hidden\tfromx_mont_384\n.type\tfromx_mont_384,@function\n.align\t32\nfromx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nfrom_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\n\n\n\tmovq\t%r14,%rax\n\tmovq\t%r15,%rcx\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tfromx_mont_384,.-fromx_mont_384\n.type\t__mulx_by_1_mont_384,@function\n.align\t32\n__mulx_by_1_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t%rcx,%rdx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\timulq\t%r8,%rdx\n\n\n\txorq\t%r14,%r14\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r13\n\tadoxq\t%r14,%rbp\n\tadcxq\t%rbp,%r14\n\timulq\t%r9,%rdx\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r14\n\tadoxq\t%r15,%rbp\n\tadcxq\t%rbp,%r15\n\timulq\t%r10,%rdx\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r15\n\tadoxq\t%r8,%rbp\n\tadcxq\t%rbp,%r8\n\timulq\t%r11,%rdx\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r8\n\tadoxq\t%r9,%rbp\n\tadcxq\t%rbp,%r9\n\timulq\t%r12,%rdx\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r9\n\tadoxq\t%r10,%rbp\n\tadcxq\t%rbp,%r10\n\timulq\t%r13,%rdx\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r10\n\tadoxq\t%r11,%rbp\n\tadcxq\t%rbp,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__mulx_by_1_mont_384,.-__mulx_by_1_mont_384\n\n.type\t__redx_tail_mont_384,@function\n.align\t32\n__redx_tail_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t48(%rsi),%r14\n\tmovq\t%r14,%rax\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tmovq\t%r15,%rcx\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tsbbq\t%r12,%r12\n\n\n\n\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\t__redx_tail_mont_384,.-__redx_tail_mont_384\n\n.globl\tsgn0x_pty_mont_384\n.hidden\tsgn0x_pty_mont_384\n.type\tsgn0x_pty_mont_384,@function\n.align\t32\nsgn0x_pty_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsgn0_pty_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t0(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\txorq\t%rax,%rax\n\tmovq\t%r14,%r13\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsgn0x_pty_mont_384,.-sgn0x_pty_mont_384\n\n.globl\tsgn0x_pty_mont_384x\n.hidden\tsgn0x_pty_mont_384x\n.type\tsgn0x_pty_mont_384x,@function\n.align\t32\nsgn0x_pty_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsgn0_pty_mont_384x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t48(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\tleaq\t0(%rdi),%rsi\n\txorq\t%rdi,%rdi\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r14,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%r13\n\tandq\t$2,%rdi\n\torq\t%r13,%rdi\n\n\tcall\t__mulx_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\txorq\t%rax,%rax\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%r12\n\n\tnotq\t%rax\n\n\ttestq\t%r14,%r14\n\tcmovzq\t%rdi,%r13\n\n\ttestq\t%r12,%r12\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsgn0x_pty_mont_384x,.-sgn0x_pty_mont_384x\n.globl\tmulx_mont_384\n.hidden\tmulx_mont_384\n.type\tmulx_mont_384,@function\n.align\t32\nmulx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nmul_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*3\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\tmovq\t%r8,(%rsp)\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*9\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tmulx_mont_384,.-mulx_mont_384\n.type\t__mulx_mont_384,@function\n.align\t32\n__mulx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tmulxq\t%r15,%r14,%r10\n\tmulxq\t%rax,%r15,%r11\n\taddq\t%r14,%r9\n\tmulxq\t%r12,%rax,%r12\n\tadcq\t%r15,%r10\n\tmulxq\t%rdi,%rdi,%r13\n\tadcq\t%rax,%r11\n\tmulxq\t%rbp,%rbp,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rdi,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t$0,%r14\n\txorq\t%r15,%r15\n\n\tmovq\t%r8,16(%rsp)\n\timulq\t8(%rsp),%r8\n\n\n\txorq\t%rax,%rax\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r8,%rdx\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\tadoxq\t%rax,%r15\n\tadoxq\t%rax,%rax\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r8,%r14\n\tadoxq\t%r8,%r15\n\tadcxq\t%r8,%r15\n\tadoxq\t%r8,%rax\n\tadcxq\t%r8,%rax\n\tmovq\t%r9,16(%rsp)\n\timulq\t8(%rsp),%r9\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r9,%rdx\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r8,%rax\n\tadoxq\t%r8,%r8\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r9,%r15\n\tadoxq\t%r9,%rax\n\tadcxq\t%r9,%rax\n\tadoxq\t%r9,%r8\n\tadcxq\t%r9,%r8\n\tmovq\t%r10,16(%rsp)\n\timulq\t8(%rsp),%r10\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r10,%rdx\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\tadoxq\t%r9,%r8\n\tadoxq\t%r9,%r9\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\tadcxq\t%r10,%rax\n\tadoxq\t%r10,%r8\n\tadcxq\t%r10,%r8\n\tadoxq\t%r10,%r9\n\tadcxq\t%r10,%r9\n\tmovq\t%r11,16(%rsp)\n\timulq\t8(%rsp),%r11\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r11,%rdx\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\tadoxq\t%r10,%r9\n\tadoxq\t%r10,%r10\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tadcxq\t%r11,%r8\n\tadoxq\t%r11,%r9\n\tadcxq\t%r11,%r9\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\tmovq\t%r12,16(%rsp)\n\timulq\t8(%rsp),%r12\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r12,%rdx\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r11,%r10\n\tadoxq\t%r11,%r11\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r13,%rdx\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tadcxq\t%r12,%r9\n\tadoxq\t%r12,%r10\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tadcxq\t%r12,%r11\n\timulq\t8(%rsp),%rdx\n\tmovq\t24(%rsp),%rbx\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tmovq\t%r15,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tmovq\t%rax,%rsi\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\tmovq\t%r14,%rdx\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tleaq\t128(%rcx),%rcx\n\tmovq\t%r8,%r12\n\tadcq\t$0,%r11\n\n\n\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r9,%rdi\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%r8\n\tsbbq\t32(%rcx),%r9\n\tmovq\t%r10,%rbp\n\tsbbq\t40(%rcx),%r10\n\tsbbq\t$0,%r11\n\n\tcmovncq\t%r14,%rdx\n\tcmovcq\t%r13,%r15\n\tcmovcq\t%rsi,%rax\n\tcmovncq\t%r8,%r12\n\tmovq\t%rdx,0(%rbx)\n\tcmovncq\t%r9,%rdi\n\tmovq\t%r15,8(%rbx)\n\tcmovncq\t%r10,%rbp\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r12,24(%rbx)\n\tmovq\t%rdi,32(%rbx)\n\tmovq\t%rbp,40(%rbx)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rsi\n\tlfence\n\tjmpq\t*%rsi\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\t__mulx_mont_384,.-__mulx_mont_384\n.globl\tsqrx_mont_384\n.hidden\tsqrx_mont_384\n.type\tsqrx_mont_384,@function\n.align\t32\nsqrx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*3\n\n\n\tmovq\t%rcx,%r8\n\tleaq\t-128(%rdx),%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tleaq\t(%rsi),%rbx\n\tmovq\t%r8,(%rsp)\n\tleaq\t-128(%rsi),%rsi\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*9\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_mont_384,.-sqrx_mont_384\n\n.globl\tsqrx_n_mul_mont_384\n.hidden\tsqrx_n_mul_mont_384\n.type\tsqrx_n_mul_mont_384,@function\n.align\t32\nsqrx_n_mul_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_n_mul_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-40(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*5\n\n\n\tmovq\t%rdx,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t%rsi,%rbx\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tmovq\t%r8,(%rsp)\n\tmovq\t%r9,24(%rsp)\n\tmovq\t0(%r9),%xmm2\n\n.Loop_sqrx_384:\n\tmovd\t%r10d,%xmm1\n\tleaq\t-128(%rbx),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovd\t%xmm1,%r10d\n\tdecl\t%r10d\n\tjnz\t.Loop_sqrx_384\n\n\tmovq\t%rdx,%r14\n.byte\t102,72,15,126,210\n\tleaq\t-128(%rbx),%rsi\n\tmovq\t24(%rsp),%rbx\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t40(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t48(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t56(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t64(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t72(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t80(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t88(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_n_mul_mont_384,.-sqrx_n_mul_mont_384\n\n.globl\tsqrx_n_mul_mont_383\n.hidden\tsqrx_n_mul_mont_383\n.type\tsqrx_n_mul_mont_383,@function\n.align\t32\nsqrx_n_mul_mont_383:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_n_mul_mont_383$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-40(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*5\n\n\n\tmovq\t%rdx,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t%rsi,%rbx\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tmovq\t%r8,(%rsp)\n\tmovq\t%r9,24(%rsp)\n\tmovq\t0(%r9),%xmm2\n\tleaq\t-128(%rcx),%rcx\n\n.Loop_sqrx_383:\n\tmovd\t%r10d,%xmm1\n\tleaq\t-128(%rbx),%rsi\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\n\tmovd\t%xmm1,%r10d\n\tdecl\t%r10d\n\tjnz\t.Loop_sqrx_383\n\n\tmovq\t%rdx,%r14\n.byte\t102,72,15,126,210\n\tleaq\t-128(%rbx),%rsi\n\tmovq\t24(%rsp),%rbx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t40(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t48(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t56(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t64(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t72(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t80(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t88(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_n_mul_mont_383,.-sqrx_n_mul_mont_383\n.type\t__mulx_mont_383_nonred,@function\n.align\t32\n__mulx_mont_383_nonred:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tmulxq\t%r15,%r14,%r10\n\tmulxq\t%rax,%r15,%r11\n\taddq\t%r14,%r9\n\tmulxq\t%r12,%rax,%r12\n\tadcq\t%r15,%r10\n\tmulxq\t%rdi,%rdi,%r13\n\tadcq\t%rax,%r11\n\tmulxq\t%rbp,%rbp,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rdi,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t$0,%r14\n\tmovq\t%r8,%rax\n\timulq\t8(%rsp),%r8\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r8,%rdx\n\tadoxq\t%rdi,%r14\n\tadcxq\t%r15,%rbp\n\tadoxq\t%rbp,%r15\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\tadcxq\t%rax,%r14\n\tadoxq\t%rax,%r15\n\tadcxq\t%rax,%r15\n\tmovq\t%r9,%r8\n\timulq\t8(%rsp),%r9\n\n\n\txorq\t%rax,%rax\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r9,%rdx\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rax,%rbp\n\tadoxq\t%rbp,%rax\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r8,%r15\n\tadoxq\t%r8,%rax\n\tadcxq\t%r8,%rax\n\tmovq\t%r10,%r9\n\timulq\t8(%rsp),%r10\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r10,%rdx\n\tadoxq\t%rdi,%rax\n\tadcxq\t%r8,%rbp\n\tadoxq\t%rbp,%r8\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\tadcxq\t%r9,%rax\n\tadoxq\t%r9,%r8\n\tadcxq\t%r9,%r8\n\tmovq\t%r11,%r10\n\timulq\t8(%rsp),%r11\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r11,%rdx\n\tadoxq\t%rdi,%r8\n\tadcxq\t%r9,%rbp\n\tadoxq\t%rbp,%r9\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tadcxq\t%r10,%r8\n\tadoxq\t%r10,%r9\n\tadcxq\t%r10,%r9\n\tmovq\t%r12,%r11\n\timulq\t8(%rsp),%r12\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r12,%rdx\n\tadoxq\t%rdi,%r9\n\tadcxq\t%r10,%rbp\n\tadoxq\t%rbp,%r10\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r13,%rdx\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tadcxq\t%r11,%r9\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\timulq\t8(%rsp),%rdx\n\tmovq\t24(%rsp),%rbx\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r14,%rdx\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\tadcq\t$0,%r10\n\tmovq\t%r8,%r12\n\n\tmovq\t%r14,0(%rbx)\n\tmovq\t%r15,8(%rbx)\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r9,%rdi\n\tmovq\t%r8,24(%rbx)\n\tmovq\t%r9,32(%rbx)\n\tmovq\t%r10,40(%rbx)\n\tmovq\t%r10,%rbp\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rsi\n\tlfence\n\tjmpq\t*%rsi\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\t__mulx_mont_383_nonred,.-__mulx_mont_383_nonred\n.globl\tsqrx_mont_382x\n.hidden\tsqrx_mont_382x\n.type\tsqrx_mont_382x,@function\n.align\t32\nsqrx_mont_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nsqr_mont_382x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rdx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rbx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rdx\n\tsbbq\t80(%rsi),%rbx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rdi,%rdi\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\tmovq\t%r14,32+48(%rsp)\n\tmovq\t%r15,32+56(%rsp)\n\tmovq\t%rax,32+64(%rsp)\n\tmovq\t%rdx,32+72(%rsp)\n\tmovq\t%rbx,32+80(%rsp)\n\tmovq\t%rbp,32+88(%rsp)\n\tmovq\t%rdi,32+96(%rsp)\n\n\n\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\taddq\t%rdx,%rdx\n\tadcq\t%r15,%r15\n\tadcq\t%rax,%rax\n\tadcq\t%r12,%r12\n\tadcq\t%rdi,%rdi\n\tadcq\t%rbp,%rbp\n\n\tmovq\t%rdx,48(%rbx)\n\tmovq\t%r15,56(%rbx)\n\tmovq\t%rax,64(%rbx)\n\tmovq\t%r12,72(%rbx)\n\tmovq\t%rdi,80(%rbx)\n\tmovq\t%rbp,88(%rbx)\n\n\tleaq\t32-128(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rdx\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%rax\n\tmovq\t32+24(%rsp),%r12\n\tmovq\t32+32(%rsp),%rdi\n\tmovq\t32+40(%rsp),%rbp\n\n\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\tmovq\t32+96(%rsp),%r14\n\tleaq\t128(%rcx),%rcx\n\tmovq\t32+0(%rsp),%r8\n\tandq\t%r14,%r8\n\tmovq\t32+8(%rsp),%r9\n\tandq\t%r14,%r9\n\tmovq\t32+16(%rsp),%r10\n\tandq\t%r14,%r10\n\tmovq\t32+24(%rsp),%r11\n\tandq\t%r14,%r11\n\tmovq\t32+32(%rsp),%r13\n\tandq\t%r14,%r13\n\tandq\t32+40(%rsp),%r14\n\n\tsubq\t%r8,%rdx\n\tmovq\t0(%rcx),%r8\n\tsbbq\t%r9,%r15\n\tmovq\t8(%rcx),%r9\n\tsbbq\t%r10,%rax\n\tmovq\t16(%rcx),%r10\n\tsbbq\t%r11,%r12\n\tmovq\t24(%rcx),%r11\n\tsbbq\t%r13,%rdi\n\tmovq\t32(%rcx),%r13\n\tsbbq\t%r14,%rbp\n\tsbbq\t%r14,%r14\n\n\tandq\t%r14,%r8\n\tandq\t%r14,%r9\n\tandq\t%r14,%r10\n\tandq\t%r14,%r11\n\tandq\t%r14,%r13\n\tandq\t40(%rcx),%r14\n\n\taddq\t%r8,%rdx\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rax\n\tadcq\t%r11,%r12\n\tadcq\t%r13,%rdi\n\tadcq\t%r14,%rbp\n\n\tmovq\t%rdx,0(%rbx)\n\tmovq\t%r15,8(%rbx)\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r12,24(%rbx)\n\tmovq\t%rdi,32(%rbx)\n\tmovq\t%rbp,40(%rbx)\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tsqrx_mont_382x,.-sqrx_mont_382x\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/sha256-armv8.S",
    "content": "#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n//\n// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n//\n// ====================================================================\n// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n// project.\n// ====================================================================\n//\n// sha256_block procedure for ARMv8.\n//\n// This module is stripped of scalar code paths, with rationale that all\n// known processors are NEON-capable.\n//\n// See original module at CRYPTOGAMS for further details.\n\n.comm\t__blst_platform_cap,4\n.text\n\n.align\t6\n.type\t.LK256,%object\n.LK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n.long\t0\t//terminator\n.size\t.LK256,.-.LK256\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.align\t2\n.align\t2\n.globl\tblst_sha256_block_armv8\n.hidden\tblst_sha256_block_armv8\n.type\tblst_sha256_block_armv8,%function\n.align\t6\nblst_sha256_block_armv8:\n\thint\t#34\n.Lv8_entry:\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tld1\t{v0.4s,v1.4s},[x0]\n\tadr\tx3,.LK256\n\n.Loop_hw:\n\tld1\t{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64\n\tsub\tx2,x2,#1\n\tld1\t{v16.4s},[x3],#16\n\trev32\tv4.16b,v4.16b\n\trev32\tv5.16b,v5.16b\n\trev32\tv6.16b,v6.16b\n\trev32\tv7.16b,v7.16b\n\torr\tv18.16b,v0.16b,v0.16b\t\t// offload\n\torr\tv19.16b,v1.16b,v1.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.inst\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.inst\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.inst\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.inst\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.inst\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.inst\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.inst\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.inst\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.inst\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.inst\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.inst\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.inst\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.inst\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.inst\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tld1\t{v17.4s},[x3]\n\tadd\tv16.4s,v16.4s,v6.4s\n\tsub\tx3,x3,#64*4-16\t// rewind\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.inst\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tadd\tv17.4s,v17.4s,v7.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.inst\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.inst\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tadd\tv0.4s,v0.4s,v18.4s\n\tadd\tv1.4s,v1.4s,v19.4s\n\n\tcbnz\tx2,.Loop_hw\n\n\tst1\t{v0.4s,v1.4s},[x0]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\tret\n.size\tblst_sha256_block_armv8,.-blst_sha256_block_armv8\n.globl\tblst_sha256_block_data_order\n.hidden\tblst_sha256_block_data_order\n.type\tblst_sha256_block_data_order,%function\n.align\t4\nblst_sha256_block_data_order:\n\thint\t#34\n\tadrp\tx16,__blst_platform_cap\n\tldr\tw16,[x16,#:lo12:__blst_platform_cap]\n\ttst\tw16,#1\n\tb.ne\t.Lv8_entry\n\n\tstp\tx29, x30, [sp, #-2*__SIZEOF_POINTER__]!\n\tmov\tx29, sp\n\tsub\tsp,sp,#16*4\n\n\tadr\tx16,.LK256\n\tadd\tx2,x1,x2,lsl#6\t// len to point at the end of inp\n\n\tld1\t{v0.16b},[x1], #16\n\tld1\t{v1.16b},[x1], #16\n\tld1\t{v2.16b},[x1], #16\n\tld1\t{v3.16b},[x1], #16\n\tld1\t{v4.4s},[x16], #16\n\tld1\t{v5.4s},[x16], #16\n\tld1\t{v6.4s},[x16], #16\n\tld1\t{v7.4s},[x16], #16\n\trev32\tv0.16b,v0.16b\t\t// yes, even on\n\trev32\tv1.16b,v1.16b\t\t// big-endian\n\trev32\tv2.16b,v2.16b\n\trev32\tv3.16b,v3.16b\n\tmov\tx17,sp\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tv5.4s,v5.4s,v1.4s\n\tadd\tv6.4s,v6.4s,v2.4s\n\tst1\t{v4.4s,v5.4s},[x17], #32\n\tadd\tv7.4s,v7.4s,v3.4s\n\tst1\t{v6.4s,v7.4s},[x17]\n\tsub\tx17,x17,#32\n\n\tldp\tw3,w4,[x0]\n\tldp\tw5,w6,[x0,#8]\n\tldp\tw7,w8,[x0,#16]\n\tldp\tw9,w10,[x0,#24]\n\tldr\tw12,[sp,#0]\n\tmov\tw13,wzr\n\teor\tw14,w4,w5\n\tmov\tw15,wzr\n\tb\t.L_00_48\n\n.align\t4\n.L_00_48:\n\text\tv4.16b,v0.16b,v1.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v2.16b,v3.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v3.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv0.4s,v0.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv0.4s,v0.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv0.4s,v0.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v0.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v0.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v0.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v0.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v0.4s,#13\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv0.4s,v0.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v1.16b,v2.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v3.16b,v0.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v0.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv1.4s,v1.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv1.4s,v1.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv1.4s,v1.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v1.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v1.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v1.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v1.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v1.4s,#13\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv1.4s,v1.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v2.16b,v3.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v0.16b,v1.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v1.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv2.4s,v2.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv2.4s,v2.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv2.4s,v2.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v2.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v2.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v2.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v2.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v2.4s,#13\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv2.4s,v2.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v3.16b,v0.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v1.16b,v2.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v2.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv3.4s,v3.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv3.4s,v3.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv3.4s,v3.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v3.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v3.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v3.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v3.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v3.4s,#13\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv3.4s,v3.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[x16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tcmp\tw12,#0\t\t\t\t// check for K256 terminator\n\tldr\tw12,[sp,#0]\n\tsub\tx17,x17,#64\n\tbne\t.L_00_48\n\n\tsub\tx16,x16,#256\n\tcmp\tx1,x2\n\tmov\tx17, #-64\n\tcsel\tx17, x17, xzr, eq\n\tadd\tx1,x1,x17\n\tmov\tx17,sp\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v0.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv0.16b,v0.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v1.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv1.16b,v1.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v2.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv2.16b,v2.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v3.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv3.16b,v3.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw3,w3,w15\t\t\t// h+=Sigma0(a) from the past\n\tldp\tw11,w12,[x0,#0]\n\tadd\tw3,w3,w13\t\t\t// h+=Maj(a,b,c) from the past\n\tldp\tw13,w14,[x0,#8]\n\tadd\tw3,w3,w11\t\t\t// accumulate\n\tadd\tw4,w4,w12\n\tldp\tw11,w12,[x0,#16]\n\tadd\tw5,w5,w13\n\tadd\tw6,w6,w14\n\tldp\tw13,w14,[x0,#24]\n\tadd\tw7,w7,w11\n\tadd\tw8,w8,w12\n\tldr\tw12,[sp,#0]\n\tstp\tw3,w4,[x0,#0]\n\tadd\tw9,w9,w13\n\tmov\tw13,wzr\n\tstp\tw5,w6,[x0,#8]\n\tadd\tw10,w10,w14\n\tstp\tw7,w8,[x0,#16]\n\teor\tw14,w4,w5\n\tstp\tw9,w10,[x0,#24]\n\tmov\tw15,wzr\n\tmov\tx17,sp\n\tb.ne\t.L_00_48\n\n\tldr\tx29,[x29]\n\tadd\tsp,sp,#16*4+2*__SIZEOF_POINTER__\n\tret\n.size\tblst_sha256_block_data_order,.-blst_sha256_block_data_order\n.globl\tblst_sha256_emit\n.hidden\tblst_sha256_emit\n.type\tblst_sha256_emit,%function\n.align\t4\nblst_sha256_emit:\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n#ifndef\t__AARCH64EB__\n\trev\tx4,x4\n\trev\tx5,x5\n\trev\tx6,x6\n\trev\tx7,x7\n#endif\n\tstr\tw4,[x0,#4]\n\tlsr\tx4,x4,#32\n\tstr\tw5,[x0,#12]\n\tlsr\tx5,x5,#32\n\tstr\tw6,[x0,#20]\n\tlsr\tx6,x6,#32\n\tstr\tw7,[x0,#28]\n\tlsr\tx7,x7,#32\n\tstr\tw4,[x0,#0]\n\tstr\tw5,[x0,#8]\n\tstr\tw6,[x0,#16]\n\tstr\tw7,[x0,#24]\n\tret\n.size\tblst_sha256_emit,.-blst_sha256_emit\n\n.globl\tblst_sha256_bcopy\n.hidden\tblst_sha256_bcopy\n.type\tblst_sha256_bcopy,%function\n.align\t4\nblst_sha256_bcopy:\n\thint\t#34\n.Loop_bcopy:\n\tldrb\tw3,[x1],#1\n\tsub\tx2,x2,#1\n\tstrb\tw3,[x0],#1\n\tcbnz\tx2,.Loop_bcopy\n\tret\n.size\tblst_sha256_bcopy,.-blst_sha256_bcopy\n\n.globl\tblst_sha256_hcopy\n.hidden\tblst_sha256_hcopy\n.type\tblst_sha256_hcopy,%function\n.align\t4\nblst_sha256_hcopy:\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tstp\tx4,x5,[x0]\n\tstp\tx6,x7,[x0,#16]\n\tret\n.size\tblst_sha256_hcopy,.-blst_sha256_hcopy\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",@progbits\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n"
  },
  {
    "path": "build/elf/sha256-portable-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n.text\t\n\n.globl\tblst_sha256_block_data_order\n.type\tblst_sha256_block_data_order,@function\n.align\t16\nblst_sha256_block_data_order:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tmovq\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n#ifdef __BLST_PORTABLE__\n\ttestl\t$2,__blst_platform_cap(%rip)\n\tjnz\t.Lblst_sha256_block_data_order$2\n#endif\n\tpushq\t%rbx\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_offset\t%r15,-56\n\tshlq\t$4,%rdx\n\tsubq\t$64+24,%rsp\n\n.cfi_def_cfa\t%rsp,144\n\n\tleaq\t(%rsi,%rdx,4),%rdx\n\tmovq\t%rdi,64+0(%rsp)\n\tmovq\t%rsi,64+8(%rsp)\n\tmovq\t%rdx,64+16(%rsp)\n\n\tmovl\t0(%rdi),%eax\n\tmovl\t4(%rdi),%ebx\n\tmovl\t8(%rdi),%ecx\n\tmovl\t12(%rdi),%edx\n\tmovl\t16(%rdi),%r8d\n\tmovl\t20(%rdi),%r9d\n\tmovl\t24(%rdi),%r10d\n\tmovl\t28(%rdi),%r11d\n\tjmp\t.Lloop\n\n.align\t16\n.Lloop:\n\tmovl\t%ebx,%edi\n\tleaq\tK256(%rip),%rbp\n\txorl\t%ecx,%edi\n\tmovl\t0(%rsi),%r12d\n\tmovl\t%r8d,%r13d\n\tmovl\t%eax,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,0(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t0(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r14d,%r11d\n\tmovl\t4(%rsi),%r12d\n\tmovl\t%edx,%r13d\n\tmovl\t%r11d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,4(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t4(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r14d,%r10d\n\tmovl\t8(%rsi),%r12d\n\tmovl\t%ecx,%r13d\n\tmovl\t%r10d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,8(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t8(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r14d,%r9d\n\tmovl\t12(%rsi),%r12d\n\tmovl\t%ebx,%r13d\n\tmovl\t%r9d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,12(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t12(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r14d,%r8d\n\tmovl\t16(%rsi),%r12d\n\tmovl\t%eax,%r13d\n\tmovl\t%r8d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,16(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t16(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r14d,%edx\n\tmovl\t20(%rsi),%r12d\n\tmovl\t%r11d,%r13d\n\tmovl\t%edx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,20(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t20(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r14d,%ecx\n\tmovl\t24(%rsi),%r12d\n\tmovl\t%r10d,%r13d\n\tmovl\t%ecx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,24(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t24(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r14d,%ebx\n\tmovl\t28(%rsi),%r12d\n\tmovl\t%r9d,%r13d\n\tmovl\t%ebx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,28(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t28(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r14d,%eax\n\tmovl\t32(%rsi),%r12d\n\tmovl\t%r8d,%r13d\n\tmovl\t%eax,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,32(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t32(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r14d,%r11d\n\tmovl\t36(%rsi),%r12d\n\tmovl\t%edx,%r13d\n\tmovl\t%r11d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,36(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t36(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r14d,%r10d\n\tmovl\t40(%rsi),%r12d\n\tmovl\t%ecx,%r13d\n\tmovl\t%r10d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,40(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t40(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r14d,%r9d\n\tmovl\t44(%rsi),%r12d\n\tmovl\t%ebx,%r13d\n\tmovl\t%r9d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,44(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t44(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r14d,%r8d\n\tmovl\t48(%rsi),%r12d\n\tmovl\t%eax,%r13d\n\tmovl\t%r8d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,48(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t48(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r14d,%edx\n\tmovl\t52(%rsi),%r12d\n\tmovl\t%r11d,%r13d\n\tmovl\t%edx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,52(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t52(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r14d,%ecx\n\tmovl\t56(%rsi),%r12d\n\tmovl\t%r10d,%r13d\n\tmovl\t%ecx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,56(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t56(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r14d,%ebx\n\tmovl\t60(%rsi),%r12d\n\tmovl\t%r9d,%r13d\n\tmovl\t%ebx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,60(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t60(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tjmp\t.Lrounds_16_xx\n.align\t16\n.Lrounds_16_xx:\n\tmovl\t4(%rsp),%r13d\n\tmovl\t56(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%eax\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t36(%rsp),%r12d\n\n\taddl\t0(%rsp),%r12d\n\tmovl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,0(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t64(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\tmovl\t8(%rsp),%r13d\n\tmovl\t60(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r11d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t40(%rsp),%r12d\n\n\taddl\t4(%rsp),%r12d\n\tmovl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,4(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t68(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\tmovl\t12(%rsp),%r13d\n\tmovl\t0(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r10d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t44(%rsp),%r12d\n\n\taddl\t8(%rsp),%r12d\n\tmovl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,8(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t72(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\tmovl\t16(%rsp),%r13d\n\tmovl\t4(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r9d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t48(%rsp),%r12d\n\n\taddl\t12(%rsp),%r12d\n\tmovl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,12(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t76(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\tmovl\t20(%rsp),%r13d\n\tmovl\t8(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r8d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t52(%rsp),%r12d\n\n\taddl\t16(%rsp),%r12d\n\tmovl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,16(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t80(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\tmovl\t24(%rsp),%r13d\n\tmovl\t12(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%edx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t56(%rsp),%r12d\n\n\taddl\t20(%rsp),%r12d\n\tmovl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,20(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t84(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\tmovl\t28(%rsp),%r13d\n\tmovl\t16(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ecx\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t60(%rsp),%r12d\n\n\taddl\t24(%rsp),%r12d\n\tmovl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,24(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t88(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\tmovl\t32(%rsp),%r13d\n\tmovl\t20(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ebx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t0(%rsp),%r12d\n\n\taddl\t28(%rsp),%r12d\n\tmovl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,28(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t92(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tmovl\t36(%rsp),%r13d\n\tmovl\t24(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%eax\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t4(%rsp),%r12d\n\n\taddl\t32(%rsp),%r12d\n\tmovl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,32(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t96(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\tmovl\t40(%rsp),%r13d\n\tmovl\t28(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r11d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t8(%rsp),%r12d\n\n\taddl\t36(%rsp),%r12d\n\tmovl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,36(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t100(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\tmovl\t44(%rsp),%r13d\n\tmovl\t32(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r10d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t12(%rsp),%r12d\n\n\taddl\t40(%rsp),%r12d\n\tmovl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,40(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t104(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\tmovl\t48(%rsp),%r13d\n\tmovl\t36(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r9d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t16(%rsp),%r12d\n\n\taddl\t44(%rsp),%r12d\n\tmovl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,44(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t108(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\tmovl\t52(%rsp),%r13d\n\tmovl\t40(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r8d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t20(%rsp),%r12d\n\n\taddl\t48(%rsp),%r12d\n\tmovl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,48(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t112(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\tmovl\t56(%rsp),%r13d\n\tmovl\t44(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%edx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t24(%rsp),%r12d\n\n\taddl\t52(%rsp),%r12d\n\tmovl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,52(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t116(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\tmovl\t60(%rsp),%r13d\n\tmovl\t48(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ecx\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t28(%rsp),%r12d\n\n\taddl\t56(%rsp),%r12d\n\tmovl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,56(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t120(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\tmovl\t0(%rsp),%r13d\n\tmovl\t52(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ebx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t32(%rsp),%r12d\n\n\taddl\t60(%rsp),%r12d\n\tmovl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,60(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t124(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tleaq\t64(%rbp),%rbp\n\tcmpb\t$0x19,3(%rbp)\n\tjnz\t.Lrounds_16_xx\n\n\tmovq\t64+0(%rsp),%rdi\n\taddl\t%r14d,%eax\n\tleaq\t64(%rsi),%rsi\n\n\taddl\t0(%rdi),%eax\n\taddl\t4(%rdi),%ebx\n\taddl\t8(%rdi),%ecx\n\taddl\t12(%rdi),%edx\n\taddl\t16(%rdi),%r8d\n\taddl\t20(%rdi),%r9d\n\taddl\t24(%rdi),%r10d\n\taddl\t28(%rdi),%r11d\n\n\tcmpq\t64+16(%rsp),%rsi\n\n\tmovl\t%eax,0(%rdi)\n\tmovl\t%ebx,4(%rdi)\n\tmovl\t%ecx,8(%rdi)\n\tmovl\t%edx,12(%rdi)\n\tmovl\t%r8d,16(%rdi)\n\tmovl\t%r9d,20(%rdi)\n\tmovl\t%r10d,24(%rdi)\n\tmovl\t%r11d,28(%rdi)\n\tjb\t.Lloop\n\n\tleaq\t64+24+48(%rsp),%r11\n.cfi_def_cfa\t%r11,8\n\tmovq\t64+24(%rsp),%r15\n\tmovq\t-40(%r11),%r14\n\tmovq\t-32(%r11),%r13\n\tmovq\t-24(%r11),%r12\n\tmovq\t-16(%r11),%rbx\n\tmovq\t-8(%r11),%rbp\n.cfi_restore\t%r12\n.cfi_restore\t%r13\n.cfi_restore\t%r14\n.cfi_restore\t%r15\n.cfi_restore\t%rbp\n.cfi_restore\t%rbx\n\tleaq\t(%r11),%rsp\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tblst_sha256_block_data_order,.-blst_sha256_block_data_order\n\n#ifndef __BLST_PORTABLE__\n.section\t.rodata\n.align\t64\n.type\tK256,@object\nK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.globl\tblst_sha256_emit\n.hidden\tblst_sha256_emit\n.type\tblst_sha256_emit,@function\n.align\t16\nblst_sha256_emit:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tbswapq\t%r8\n\tmovq\t24(%rsi),%r11\n\tbswapq\t%r9\n\tmovl\t%r8d,4(%rdi)\n\tbswapq\t%r10\n\tmovl\t%r9d,12(%rdi)\n\tbswapq\t%r11\n\tmovl\t%r10d,20(%rdi)\n\tshrq\t$32,%r8\n\tmovl\t%r11d,28(%rdi)\n\tshrq\t$32,%r9\n\tmovl\t%r8d,0(%rdi)\n\tshrq\t$32,%r10\n\tmovl\t%r9d,8(%rdi)\n\tshrq\t$32,%r11\n\tmovl\t%r10d,16(%rdi)\n\tmovl\t%r11d,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tblst_sha256_emit,.-blst_sha256_emit\n\n.globl\tblst_sha256_bcopy\n.hidden\tblst_sha256_bcopy\n.type\tblst_sha256_bcopy,@function\n.align\t16\nblst_sha256_bcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tsubq\t%rsi,%rdi\n.Loop_bcopy:\n\tmovzbl\t(%rsi),%eax\n\tleaq\t1(%rsi),%rsi\n\tmovb\t%al,-1(%rdi,%rsi,1)\n\tdecq\t%rdx\n\tjnz\t.Loop_bcopy\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tblst_sha256_bcopy,.-blst_sha256_bcopy\n\n.globl\tblst_sha256_hcopy\n.hidden\tblst_sha256_hcopy\n.type\tblst_sha256_hcopy,@function\n.align\t16\nblst_sha256_hcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tblst_sha256_hcopy,.-blst_sha256_hcopy\n#endif\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/elf/sha256-x86_64.s",
    "content": ".comm\t__blst_platform_cap,4\n\n.section\t.rodata\n.align\t64\n.type\tK256,@object\nK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n.long\t0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f\n.long\t0x03020100,0x0b0a0908,0xffffffff,0xffffffff\n.long\t0xffffffff,0xffffffff,0x03020100,0x0b0a0908\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.text\t\n.globl\tblst_sha256_block_data_order_shaext\n.hidden\tblst_sha256_block_data_order_shaext\n.type\tblst_sha256_block_data_order_shaext,@function\n.align\t64\nblst_sha256_block_data_order_shaext:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tmovq\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n.Lblst_sha256_block_data_order$2:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tleaq\tK256+128(%rip),%rcx\n\tmovdqu\t(%rdi),%xmm1\n\tmovdqu\t16(%rdi),%xmm2\n\tmovdqa\t256-128(%rcx),%xmm7\n\n\tpshufd\t$0x1b,%xmm1,%xmm0\n\tpshufd\t$0xb1,%xmm1,%xmm1\n\tpshufd\t$0x1b,%xmm2,%xmm2\n\tmovdqa\t%xmm7,%xmm8\n.byte\t102,15,58,15,202,8\n\tpunpcklqdq\t%xmm0,%xmm2\n\tjmp\t.Loop_shaext\n\n.align\t16\n.Loop_shaext:\n\tmovdqu\t(%rsi),%xmm3\n\tmovdqu\t16(%rsi),%xmm4\n\tmovdqu\t32(%rsi),%xmm5\n.byte\t102,15,56,0,223\n\tmovdqu\t48(%rsi),%xmm6\n\n\tmovdqa\t0-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t102,15,56,0,231\n\tmovdqa\t%xmm2,%xmm10\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tnop\n\tmovdqa\t%xmm1,%xmm9\n.byte\t15,56,203,202\n\n\tmovdqa\t16-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t102,15,56,0,239\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tleaq\t64(%rsi),%rsi\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\n\tmovdqa\t32-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t102,15,56,0,247\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\n\tmovdqa\t48-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t64-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t80-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n\tnop\n\tpaddd\t%xmm7,%xmm6\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\tmovdqa\t96-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,205,245\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\tmovdqa\t112-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t128-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t144-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n\tnop\n\tpaddd\t%xmm7,%xmm6\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\tmovdqa\t160-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,205,245\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\tmovdqa\t176-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t192-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t208-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n.byte\t15,56,203,202\n\tpaddd\t%xmm7,%xmm6\n\n\tmovdqa\t224-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n.byte\t15,56,205,245\n\tmovdqa\t%xmm8,%xmm7\n.byte\t15,56,203,202\n\n\tmovdqa\t240-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n\tnop\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tdecq\t%rdx\n\tnop\n.byte\t15,56,203,202\n\n\tpaddd\t%xmm10,%xmm2\n\tpaddd\t%xmm9,%xmm1\n\tjnz\t.Loop_shaext\n\n\tpshufd\t$0xb1,%xmm2,%xmm2\n\tpshufd\t$0x1b,%xmm1,%xmm7\n\tpshufd\t$0xb1,%xmm1,%xmm1\n\tpunpckhqdq\t%xmm2,%xmm1\n.byte\t102,15,58,15,215,8\n\n\tmovdqu\t%xmm1,(%rdi)\n\tmovdqu\t%xmm2,16(%rdi)\n.cfi_def_cfa_register\t%rsp\n\tpopq\t%rbp\n.cfi_adjust_cfa_offset\t-8\n.cfi_restore\t%rbp\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tblst_sha256_block_data_order_shaext,.-blst_sha256_block_data_order_shaext\n.globl\tblst_sha256_block_data_order\n.hidden\tblst_sha256_block_data_order\n.type\tblst_sha256_block_data_order,@function\n.align\t64\nblst_sha256_block_data_order:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tmovq\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n#ifndef\t__SGX_LVI_HARDENING__\n\ttestl\t$2,__blst_platform_cap(%rip)\n\tjnz\t.Lblst_sha256_block_data_order$2\n#endif\n\tpushq\t%rbx\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_offset\t%r15,-56\n\tshlq\t$4,%rdx\n\tsubq\t$24,%rsp\n\n\tleaq\t(%rsi,%rdx,4),%rdx\n\tmovq\t%rdi,-64(%rbp)\n\n\tmovq\t%rdx,-48(%rbp)\n\n\n\tleaq\t-64(%rsp),%rsp\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovl\t0(%rdi),%eax\n\tandq\t$-64,%rsp\n\tmovl\t4(%rdi),%ebx\n\tmovl\t8(%rdi),%ecx\n\tmovl\t12(%rdi),%edx\n\tmovl\t16(%rdi),%r8d\n\tmovl\t20(%rdi),%r9d\n\tmovl\t24(%rdi),%r10d\n\tmovl\t28(%rdi),%r11d\n\n\n\tjmp\t.Lloop_ssse3\n.align\t16\n.Lloop_ssse3:\n\tmovdqa\tK256+256(%rip),%xmm7\n\tmovq\t%rsi,-56(%rbp)\n\tmovdqu\t0(%rsi),%xmm0\n\tmovdqu\t16(%rsi),%xmm1\n\tmovdqu\t32(%rsi),%xmm2\n.byte\t102,15,56,0,199\n\tmovdqu\t48(%rsi),%xmm3\n\tleaq\tK256(%rip),%rsi\n.byte\t102,15,56,0,207\n\tmovdqa\t0(%rsi),%xmm4\n\tmovdqa\t16(%rsi),%xmm5\n.byte\t102,15,56,0,215\n\tpaddd\t%xmm0,%xmm4\n\tmovdqa\t32(%rsi),%xmm6\n.byte\t102,15,56,0,223\n\tmovdqa\t48(%rsi),%xmm7\n\tpaddd\t%xmm1,%xmm5\n\tpaddd\t%xmm2,%xmm6\n\tpaddd\t%xmm3,%xmm7\n\tmovdqa\t%xmm4,0(%rsp)\n\tmovl\t%eax,%r14d\n\tmovdqa\t%xmm5,16(%rsp)\n\tmovl\t%ebx,%edi\n\tmovdqa\t%xmm6,32(%rsp)\n\txorl\t%ecx,%edi\n\tmovdqa\t%xmm7,48(%rsp)\n\tmovl\t%r8d,%r13d\n\tjmp\t.Lssse3_00_47\n\n.align\t16\n.Lssse3_00_47:\n\tsubq\t$-64,%rsi\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm1,%xmm4\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\tmovdqa\t%xmm3,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n.byte\t102,15,58,15,224,4\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n.byte\t102,15,58,15,250,4\n\taddl\t0(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\tpaddd\t%xmm7,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\tpshufd\t$250,%xmm3,%xmm7\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%r11d,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t4(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\tpaddd\t%xmm4,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%ecx,%r13d\n\taddl\t8(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm0\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\tpshufd\t$80,%xmm0,%xmm7\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t12(%rsp),%r8d\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\tmovdqa\t0(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\tpaddd\t%xmm7,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tpaddd\t%xmm0,%xmm6\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\tmovdqa\t%xmm6,0(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm2,%xmm4\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\tmovdqa\t%xmm0,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n.byte\t102,15,58,15,225,4\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n.byte\t102,15,58,15,251,4\n\taddl\t16(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\tpaddd\t%xmm7,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\tpshufd\t$250,%xmm0,%xmm7\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%edx,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t20(%rsp),%ecx\n\tmovl\t%edx,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\tpaddd\t%xmm4,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%r10d,%r13d\n\taddl\t24(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm1\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\tpshufd\t$80,%xmm1,%xmm7\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t28(%rsp),%eax\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\tmovdqa\t16(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\tpaddd\t%xmm7,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tpaddd\t%xmm1,%xmm6\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovdqa\t%xmm6,16(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm3,%xmm4\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\tmovdqa\t%xmm1,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n.byte\t102,15,58,15,226,4\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n.byte\t102,15,58,15,248,4\n\taddl\t32(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\tpaddd\t%xmm7,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\tpshufd\t$250,%xmm1,%xmm7\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%r11d,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t36(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\tpaddd\t%xmm4,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%ecx,%r13d\n\taddl\t40(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm2\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\tpshufd\t$80,%xmm2,%xmm7\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t44(%rsp),%r8d\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\tmovdqa\t32(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\tpaddd\t%xmm7,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tpaddd\t%xmm2,%xmm6\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\tmovdqa\t%xmm6,32(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm0,%xmm4\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\tmovdqa\t%xmm2,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n.byte\t102,15,58,15,227,4\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n.byte\t102,15,58,15,249,4\n\taddl\t48(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\tpaddd\t%xmm7,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\tpshufd\t$250,%xmm2,%xmm7\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%edx,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t52(%rsp),%ecx\n\tmovl\t%edx,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\tpaddd\t%xmm4,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%r10d,%r13d\n\taddl\t56(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm3\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\tpshufd\t$80,%xmm3,%xmm7\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t60(%rsp),%eax\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\tmovdqa\t48(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\tpaddd\t%xmm7,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tpaddd\t%xmm3,%xmm6\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovdqa\t%xmm6,48(%rsp)\n\tcmpb\t$0,67(%rsi)\n\tjne\t.Lssse3_00_47\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n\taddl\t0(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\taddl\t4(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\txorl\t%ecx,%r13d\n\taddl\t8(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t12(%rsp),%r8d\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n\taddl\t16(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\taddl\t20(%rsp),%ecx\n\tmovl\t%edx,%edi\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\txorl\t%r10d,%r13d\n\taddl\t24(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t28(%rsp),%eax\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n\taddl\t32(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\taddl\t36(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\txorl\t%ecx,%r13d\n\taddl\t40(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t44(%rsp),%r8d\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n\taddl\t48(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\taddl\t52(%rsp),%ecx\n\tmovl\t%edx,%edi\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\txorl\t%r10d,%r13d\n\taddl\t56(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t60(%rsp),%eax\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovq\t-64(%rbp),%rdi\n\tmovl\t%r14d,%eax\n\tmovq\t-56(%rbp),%rsi\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\taddl\t0(%rdi),%eax\n\taddl\t4(%rdi),%ebx\n\taddl\t8(%rdi),%ecx\n\taddl\t12(%rdi),%edx\n\taddl\t16(%rdi),%r8d\n\taddl\t20(%rdi),%r9d\n\taddl\t24(%rdi),%r10d\n\taddl\t28(%rdi),%r11d\n\n\tleaq\t64(%rsi),%rsi\n\tcmpq\t-48(%rbp),%rsi\n\n\tmovl\t%eax,0(%rdi)\n\tmovl\t%ebx,4(%rdi)\n\tmovl\t%ecx,8(%rdi)\n\tmovl\t%edx,12(%rdi)\n\tmovl\t%r8d,16(%rdi)\n\tmovl\t%r9d,20(%rdi)\n\tmovl\t%r10d,24(%rdi)\n\tmovl\t%r11d,28(%rdi)\n\tjb\t.Lloop_ssse3\n\n\txorps\t%xmm0,%xmm0\n\tmovaps\t%xmm0,0(%rsp)\n\tmovaps\t%xmm0,16(%rsp)\n\tmovaps\t%xmm0,32(%rsp)\n\tmovaps\t%xmm0,48(%rsp)\n\tmovq\t-40(%rbp),%r15\n\tmovq\t-32(%rbp),%r14\n\tmovq\t-24(%rbp),%r13\n\tmovq\t-16(%rbp),%r12\n\tmovq\t-8(%rbp),%rbx\n\tmovq\t%rbp,%rsp\n.cfi_def_cfa_register\t%rsp\n\tpopq\t%rbp\n.cfi_adjust_cfa_offset\t-8\n.cfi_restore\t%rbp\n.cfi_restore\t%r12\n.cfi_restore\t%r13\n.cfi_restore\t%r14\n.cfi_restore\t%r15\n.cfi_restore\t%rbx\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n.size\tblst_sha256_block_data_order,.-blst_sha256_block_data_order\n.globl\tblst_sha256_emit\n.hidden\tblst_sha256_emit\n.type\tblst_sha256_emit,@function\n.align\t16\nblst_sha256_emit:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tbswapq\t%r8\n\tmovq\t24(%rsi),%r11\n\tbswapq\t%r9\n\tmovl\t%r8d,4(%rdi)\n\tbswapq\t%r10\n\tmovl\t%r9d,12(%rdi)\n\tbswapq\t%r11\n\tmovl\t%r10d,20(%rdi)\n\tshrq\t$32,%r8\n\tmovl\t%r11d,28(%rdi)\n\tshrq\t$32,%r9\n\tmovl\t%r8d,0(%rdi)\n\tshrq\t$32,%r10\n\tmovl\t%r9d,8(%rdi)\n\tshrq\t$32,%r11\n\tmovl\t%r10d,16(%rdi)\n\tmovl\t%r11d,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tblst_sha256_emit,.-blst_sha256_emit\n\n.globl\tblst_sha256_bcopy\n.hidden\tblst_sha256_bcopy\n.type\tblst_sha256_bcopy,@function\n.align\t16\nblst_sha256_bcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tsubq\t%rsi,%rdi\n.Loop_bcopy:\n\tmovzbl\t(%rsi),%eax\n\tleaq\t1(%rsi),%rsi\n\tmovb\t%al,-1(%rdi,%rsi,1)\n\tdecq\t%rdx\n\tjnz\t.Loop_bcopy\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tblst_sha256_bcopy,.-blst_sha256_bcopy\n\n.globl\tblst_sha256_hcopy\n.hidden\tblst_sha256_hcopy\n.type\tblst_sha256_hcopy,@function\n.align\t16\nblst_sha256_hcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n.size\tblst_sha256_hcopy,.-blst_sha256_hcopy\n\n.section\t.note.GNU-stack,\"\",@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t8\n2:\n#endif\n"
  },
  {
    "path": "build/mach-o/add_mod_256-armv8.S",
    "content": ".text\n\n.globl\t_add_mod_256\n.private_extern\t_add_mod_256\n\n.align\t5\n_add_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tadds\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tadcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tadcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\t_mul_by_3_mod_256\n.private_extern\t_mul_by_3_mod_256\n\n.align\t5\n_mul_by_3_mod_256:\n\thint\t#34\n\tldp\tx12,x13,[x1]\n\tldp\tx14,x15,[x1,#16]\n\n\tadds\tx8,x12,x12\n\tldp\tx4,x5,[x2]\n\tadcs\tx9,x13,x13\n\tldp\tx6,x7,[x2,#16]\n\tadcs\tx10,x14,x14\n\tadcs\tx11,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\tadds\tx8,x8,x12\n\tadcs\tx9,x9,x13\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x2,lo\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\t_lshift_mod_256\n.private_extern\t_lshift_mod_256\n\n.align\t5\n_lshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\nLoop_lshift_mod_256:\n\tadds\tx8,x8,x8\n\tsub\tx2,x2,#1\n\tadcs\tx9,x9,x9\n\tadcs\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx12,x8,x4\n\tsbcs\tx13,x9,x5\n\tsbcs\tx14,x10,x6\n\tsbcs\tx15,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x12,lo\n\tcsel\tx9,x9,x13,lo\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\n\tcbnz\tx2,Loop_lshift_mod_256\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\t_rshift_mod_256\n.private_extern\t_rshift_mod_256\n\n.align\t5\n_rshift_mod_256:\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\nLoop_rshift:\n\tadds\tx12,x8,x4\n\tsub\tx2,x2,#1\n\tadcs\tx13,x9,x5\n\tadcs\tx14,x10,x6\n\tadcs\tx15,x11,x7\n\tadc\tx3,xzr,xzr\n\ttst\tx8,#1\n\n\tcsel\tx12,x12,x8,ne\n\tcsel\tx13,x13,x9,ne\n\tcsel\tx14,x14,x10,ne\n\tcsel\tx15,x15,x11,ne\n\tcsel\tx3,x3,xzr,ne\n\n\textr\tx8,x13,x12,#1\n\textr\tx9,x14,x13,#1\n\textr\tx10,x15,x14,#1\n\textr\tx11,x3,x15,#1\n\n\tcbnz\tx2,Loop_rshift\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\t_cneg_mod_256\n.private_extern\t_cneg_mod_256\n\n.align\t5\n_cneg_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx4,x5,[x3]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx12,x4,x8\n\tldp\tx6,x7,[x3,#16]\n\torr\tx4,x8,x9\n\tsbcs\tx13,x5,x9\n\torr\tx5,x10,x11\n\tsbcs\tx14,x6,x10\n\torr\tx3,x4,x5\n\tsbc\tx15,x7,x11\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx8,x8,x12,eq\n\tcsel\tx9,x9,x13,eq\n\tcsel\tx10,x10,x14,eq\n\tstp\tx8,x9,[x0]\n\tcsel\tx11,x11,x15,eq\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\t_sub_mod_256\n.private_extern\t_sub_mod_256\n\n.align\t5\n_sub_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tstp\tx8,x9,[x0]\n\tadc\tx11,x11,x7\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\n\n.globl\t_check_mod_256\n.private_extern\t_check_mod_256\n\n.align\t5\n_check_mod_256:\n\tldp\tx8,x9,[x0]\n\tldp\tx10,x11,[x0,#16]\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tsubs\txzr,x8,x4\n\tsbcs\txzr,x9,x5\n\torr\tx8,x8,x9\n\tsbcs\txzr,x10,x6\n\torr\tx8,x8,x10\n\tsbcs\txzr,x11,x7\n\torr\tx8,x8,x11\n\tsbc\tx1,xzr,xzr\n\n\tcmp\tx8,#0\n\tmov\tx0,#1\n\tcsel\tx0,x0,xzr,ne\n\tand\tx0,x0,x1\n\n\tret\n\n\n.globl\t_add_n_check_mod_256\n.private_extern\t_add_n_check_mod_256\n\n.align\t5\n_add_n_check_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tadds\tx8,x8,x12\n\tldp\tx4,x5,[x3]\n\tadcs\tx9,x9,x13\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx8,x8,x16,lo\n\tcsel\tx9,x9,x17,lo\n\tcsel\tx10,x10,x1,lo\n\tcsel\tx11,x11,x2,lo\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n\n\n.globl\t_sub_n_check_mod_256\n.private_extern\t_sub_n_check_mod_256\n\n.align\t5\n_sub_n_check_mod_256:\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n#endif\n\n\tsubs\tx8,x8,x12\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tadc\tx11,x11,x7\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n#ifdef\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n#endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcsel\tx0, x17, xzr, ne\n\n\tret\n\n"
  },
  {
    "path": "build/mach-o/add_mod_256-x86_64.s",
    "content": ".text\t\n\n.globl\t_add_mod_256\n.private_extern\t_add_mod_256\n\n.p2align\t5\n_add_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\nL$oaded_a_add_mod_256:\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tmovq\t%r8,%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r9,%rsi\n\tadcq\t24(%rdx),%r11\n\tsbbq\t%rdx,%rdx\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_mul_by_3_mod_256\n.private_extern\t_mul_by_3_mod_256\n\n.p2align\t5\n_mul_by_3_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%rsi,%rdx\n\tmovq\t24(%rsi),%r11\n\n\tcall\t__lshift_mod_256\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tjmp\tL$oaded_a_add_mod_256\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__lshift_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tmovq\t%r8,%rax\n\tadcq\t%r10,%r10\n\tmovq\t%r9,%rsi\n\tadcq\t%r11,%r11\n\tsbbq\t%r12,%r12\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tcmovcq\t%rbx,%r10\n\tcmovcq\t%rbp,%r11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rax\n\tlfence\n\tjmpq\t*%rax\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.globl\t_lshift_mod_256\n.private_extern\t_lshift_mod_256\n\n.p2align\t5\n_lshift_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\nL$oop_lshift_mod_256:\n\tcall\t__lshift_mod_256\n\tdecl\t%edx\n\tjnz\tL$oop_lshift_mod_256\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_rshift_mod_256\n.private_extern\t_rshift_mod_256\n\n.p2align\t5\n_rshift_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rbp\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\nL$oop_rshift_mod_256:\n\tmovq\t%rbp,%r8\n\tandq\t$1,%rbp\n\tmovq\t0(%rcx),%rax\n\tnegq\t%rbp\n\tmovq\t8(%rcx),%rsi\n\tmovq\t16(%rcx),%rbx\n\n\tandq\t%rbp,%rax\n\tandq\t%rbp,%rsi\n\tandq\t%rbp,%rbx\n\tandq\t24(%rcx),%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tadcq\t%rbx,%r10\n\tadcq\t%rbp,%r11\n\tsbbq\t%rax,%rax\n\n\tshrq\t$1,%r8\n\tmovq\t%r9,%rbp\n\tshrq\t$1,%r9\n\tmovq\t%r10,%rbx\n\tshrq\t$1,%r10\n\tmovq\t%r11,%rsi\n\tshrq\t$1,%r11\n\n\tshlq\t$63,%rbp\n\tshlq\t$63,%rbx\n\torq\t%r8,%rbp\n\tshlq\t$63,%rsi\n\torq\t%rbx,%r9\n\tshlq\t$63,%rax\n\torq\t%rsi,%r10\n\torq\t%rax,%r11\n\n\tdecl\t%edx\n\tjnz\tL$oop_rshift_mod_256\n\n\tmovq\t%rbp,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_cneg_mod_256\n.private_extern\t_cneg_mod_256\n\n.p2align\t5\n_cneg_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r12\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%r12,%r8\n\tmovq\t24(%rsi),%r11\n\torq\t%r9,%r12\n\torq\t%r10,%r12\n\torq\t%r11,%r12\n\tmovq\t$-1,%rbp\n\n\tmovq\t0(%rcx),%rax\n\tcmovnzq\t%rbp,%r12\n\tmovq\t8(%rcx),%rsi\n\tmovq\t16(%rcx),%rbx\n\tandq\t%r12,%rax\n\tmovq\t24(%rcx),%rbp\n\tandq\t%r12,%rsi\n\tandq\t%r12,%rbx\n\tandq\t%r12,%rbp\n\n\tsubq\t%r8,%rax\n\tsbbq\t%r9,%rsi\n\tsbbq\t%r10,%rbx\n\tsbbq\t%r11,%rbp\n\n\torq\t%rdx,%rdx\n\n\tcmovzq\t%r8,%rax\n\tcmovzq\t%r9,%rsi\n\tmovq\t%rax,0(%rdi)\n\tcmovzq\t%r10,%rbx\n\tmovq\t%rsi,8(%rdi)\n\tcmovzq\t%r11,%rbp\n\tmovq\t%rbx,16(%rdi)\n\tmovq\t%rbp,24(%rdi)\n\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_sub_mod_256\n.private_extern\t_sub_mod_256\n\n.p2align\t5\n_sub_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%rax\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%rsi\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbp\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rsi\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_check_mod_256\n.private_extern\t_check_mod_256\n\n.p2align\t5\n_check_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%rax\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\n\tmovq\t%rax,%r8\n\torq\t%r9,%rax\n\torq\t%r10,%rax\n\torq\t%r11,%rax\n\n\tsubq\t0(%rsi),%r8\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t%rsi,%rsi\n\n\tmovq\t$1,%rdx\n\tcmpq\t$0,%rax\n\tcmovneq\t%rdx,%rax\n\tandq\t%rsi,%rax\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_add_n_check_mod_256\n.private_extern\t_add_n_check_mod_256\n\n.p2align\t5\n_add_n_check_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tmovq\t%r8,%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r9,%rsi\n\tadcq\t24(%rdx),%r11\n\tsbbq\t%rdx,%rdx\n\n\tmovq\t%r10,%rbx\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%rbp\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%rax,%r8\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\torq\t%r10,%r8\n\tmovq\t$1,%rax\n\tcmovzq\t%r8,%rax\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_sub_n_check_mod_256\n.private_extern\t_sub_n_check_mod_256\n\n.p2align\t5\n_sub_n_check_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%rax\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%rsi\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbp\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rsi\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\n\taddq\t%rax,%r8\n\tadcq\t%rsi,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rbx,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbp,%r11\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\torq\t%r10,%r8\n\tmovq\t$1,%rax\n\tcmovzq\t%r8,%rax\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n"
  },
  {
    "path": "build/mach-o/add_mod_384-armv8.S",
    "content": ".text\n\n.globl\t_add_mod_384\n.private_extern\t_add_mod_384\n\n.align\t5\n_add_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__add_mod_384:\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n__add_mod_384_ab_are_loaded:\n\tadds\tx10,x10,x16\n\tadcs\tx11,x11,x17\n\tadcs\tx12,x12,x19\n\tadcs\tx13,x13,x20\n\tadcs\tx14,x14,x21\n\tadcs\tx15,x15,x22\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n\n\n.globl\t_add_mod_384x\n.private_extern\t_add_mod_384x\n\n.align\t5\n_add_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_rshift_mod_384\n.private_extern\t_rshift_mod_384\n\n.align\t5\n_rshift_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\nLoop_rshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__rshift_mod_384\n\tcbnz\tx2,Loop_rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__rshift_mod_384:\n\tsbfx\tx22,x10,#0,#1\n\tand\tx16,x22,x4\n\tand\tx17,x22,x5\n\tadds\tx10,x10,x16\n\tand\tx19,x22,x6\n\tadcs\tx11,x11,x17\n\tand\tx20,x22,x7\n\tadcs\tx12,x12,x19\n\tand\tx21,x22,x8\n\tadcs\tx13,x13,x20\n\tand\tx22,x22,x9\n\tadcs\tx14,x14,x21\n\textr\tx10,x11,x10,#1\t// a[0:5] >>= 1\n\tadcs\tx15,x15,x22\n\textr\tx11,x12,x11,#1\n\tadc\tx22,xzr,xzr\n\textr\tx12,x13,x12,#1\n\textr\tx13,x14,x13,#1\n\textr\tx14,x15,x14,#1\n\textr\tx15,x22,x15,#1\n\tret\n\n\n.globl\t_div_by_2_mod_384\n.private_extern\t_div_by_2_mod_384\n\n.align\t5\n_div_by_2_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_lshift_mod_384\n.private_extern\t_lshift_mod_384\n\n.align\t5\n_lshift_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\nLoop_lshift_mod_384:\n\tsub\tx2,x2,#1\n\tbl\t__lshift_mod_384\n\tcbnz\tx2,Loop_lshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__lshift_mod_384:\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsel\tx10,x10,x16,lo\n\tcsel\tx11,x11,x17,lo\n\tcsel\tx12,x12,x19,lo\n\tcsel\tx13,x13,x20,lo\n\tcsel\tx14,x14,x21,lo\n\tcsel\tx15,x15,x22,lo\n\n\tret\n\n\n.globl\t_mul_by_3_mod_384\n.private_extern\t_mul_by_3_mod_384\n\n.align\t5\n_mul_by_3_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_mul_by_8_mod_384\n.private_extern\t_mul_by_8_mod_384\n\n.align\t5\n_mul_by_8_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_mul_by_3_mod_384x\n.private_extern\t_mul_by_3_mod_384x\n\n.align\t5\n_mul_by_3_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1,#48]\n\tldp\tx19,x20,[x1,#64]\n\tldp\tx21,x22,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_mul_by_8_mod_384x\n.private_extern\t_mul_by_8_mod_384x\n\n.align\t5\n_mul_by_8_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_cneg_mod_384\n.private_extern\t_cneg_mod_384\n\n.align\t5\n_cneg_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx4,x5,[x3]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx6,x7,[x3,#16]\n\n\tsubs\tx16,x4,x10\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx8,x9,[x3,#32]\n\torr\tx3,x10,x11\n\tsbcs\tx17,x5,x11\n\torr\tx3,x3,x12\n\tsbcs\tx19,x6,x12\n\torr\tx3,x3,x13\n\tsbcs\tx20,x7,x13\n\torr\tx3,x3,x14\n\tsbcs\tx21,x8,x14\n\torr\tx3,x3,x15\n\tsbc\tx22,x9,x15\n\n\tcmp\tx3,#0\n\tcsetm\tx3,ne\n\tands\tx2,x2,x3\n\n\tcsel\tx10,x10,x16,eq\n\tcsel\tx11,x11,x17,eq\n\tcsel\tx12,x12,x19,eq\n\tcsel\tx13,x13,x20,eq\n\tstp\tx10,x11,[x0]\n\tcsel\tx14,x14,x21,eq\n\tstp\tx12,x13,[x0,#16]\n\tcsel\tx15,x15,x22,eq\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sub_mod_384\n.private_extern\t_sub_mod_384\n\n.align\t5\n_sub_mod_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__sub_mod_384:\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n\tsubs\tx10,x10,x16\n\tsbcs\tx11,x11,x17\n\tsbcs\tx12,x12,x19\n\tsbcs\tx13,x13,x20\n\tsbcs\tx14,x14,x21\n\tsbcs\tx15,x15,x22\n\tsbc\tx3,xzr,xzr\n\n\tand\tx16,x4,x3\n\tand\tx17,x5,x3\n\tadds\tx10,x10,x16\n\tand\tx19,x6,x3\n\tadcs\tx11,x11,x17\n\tand\tx20,x7,x3\n\tadcs\tx12,x12,x19\n\tand\tx21,x8,x3\n\tadcs\tx13,x13,x20\n\tand\tx22,x9,x3\n\tadcs\tx14,x14,x21\n\tadc\tx15,x15,x22\n\n\tret\n\n\n.globl\t_sub_mod_384x\n.private_extern\t_sub_mod_384x\n\n.align\t5\n_sub_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_mul_by_1_plus_i_mod_384x\n.private_extern\t_mul_by_1_plus_i_mod_384x\n\n.align\t5\n_mul_by_1_plus_i_mod_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\tadd\tx2,x1,#48\n\n\tbl\t__sub_mod_384\t\t\t// a->re - a->im\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\t// a->re + a->im\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sgn0_pty_mod_384\n.private_extern\t_sgn0_pty_mod_384\n\n.align\t5\n_sgn0_pty_mod_384:\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx0,x10,#1\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx3,x3,xzr\n\n\tmvn\tx3,x3\n\tand\tx3,x3,#2\n\torr\tx0,x0,x3\n\n\tret\n\n\n.globl\t_sgn0_pty_mod_384x\n.private_extern\t_sgn0_pty_mod_384x\n\n.align\t5\n_sgn0_pty_mod_384x:\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx2,x10,#1\n\torr\tx3,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx3,x3,x12\n\tadcs\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tldp\tx10,x11,[x0,#48]\n\tldp\tx12,x13,[x0,#64]\n\tldp\tx14,x15,[x0,#80]\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx2,x2,x16\n\n\tand\tx0,x10,#1\n\torr\tx1,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx1,x1,x12\n\tadcs\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx0,x0,x16\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t// pack sign and parity\n\n\tret\n\n.globl\t_vec_select_32\n.private_extern\t_vec_select_32\n\n.align\t5\n_vec_select_32:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d}, [x1]\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d}, [x2]\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tst1\t{v0.2d, v1.2d}, [x0]\n\tret\n\n.globl\t_vec_select_48\n.private_extern\t_vec_select_48\n\n.align\t5\n_vec_select_48:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n\n.globl\t_vec_select_96\n.private_extern\t_vec_select_96\n\n.align\t5\n_vec_select_96:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\n.globl\t_vec_select_192\n.private_extern\t_vec_select_192\n\n.align\t5\n_vec_select_192:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\n.globl\t_vec_select_144\n.private_extern\t_vec_select_144\n\n.align\t5\n_vec_select_144:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n\n.globl\t_vec_select_288\n.private_extern\t_vec_select_288\n\n.align\t5\n_vec_select_288:\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\n.globl\t_vec_prefetch\n.private_extern\t_vec_prefetch\n\n.align\t5\n_vec_prefetch:\n\thint\t#34\n\tadd\tx1, x1, x0\n\tsub\tx1, x1, #1\n\tmov\tx2, #64\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tcsel\tx2, xzr, x2, hi\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcsel\tx0, x1, x0, hi\n\tprfm\tpldl1keep, [x0]\n\tret\n\n.globl\t_vec_is_zero_16x\n.private_extern\t_vec_is_zero_16x\n\n.align\t5\n_vec_is_zero_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tlsr\tx1, x1, #4\n\tsub\tx1, x1, #1\n\tcbz\tx1, Loop_is_zero_done\n\nLoop_is_zero:\n\tld1\t{v1.2d}, [x0], #16\n\torr\tv0.16b, v0.16b, v1.16b\n\tsub\tx1, x1, #1\n\tcbnz\tx1, Loop_is_zero\n\nLoop_is_zero_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n\n.globl\t_vec_is_equal_16x\n.private_extern\t_vec_is_equal_16x\n\n.align\t5\n_vec_is_equal_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tld1\t{v1.2d}, [x1], #16\n\tlsr\tx2, x2, #4\n\teor\tv0.16b, v0.16b, v1.16b\n\nLoop_is_equal:\n\tsub\tx2, x2, #1\n\tcbz\tx2, Loop_is_equal_done\n\tld1\t{v1.2d}, [x0], #16\n\tld1\t{v2.2d}, [x1], #16\n\teor\tv1.16b, v1.16b, v2.16b\n\torr\tv0.16b, v0.16b, v1.16b\n\tb\tLoop_is_equal\n\tnop\n\nLoop_is_equal_done:\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n\n"
  },
  {
    "path": "build/mach-o/add_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\t_add_mod_384\n.private_extern\t_add_mod_384\n\n.p2align\t5\n_add_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tcall\t__add_mod_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__add_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__add_mod_384_a_is_loaded:\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_add_mod_384x\n.private_extern\t_add_mod_384x\n\n.p2align\t5\n_add_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$24,%rsp\n.cfi_adjust_cfa_offset\t24\n\n\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rdx),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__add_mod_384\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__add_mod_384\n\n\tmovq\t24+0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t24+8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24+16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t24+24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t24+32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t24+40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24+48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_rshift_mod_384\n.private_extern\t_rshift_mod_384\n\n.p2align\t5\n_rshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\nL$oop_rshift_mod_384:\n\tcall\t__rshift_mod_384\n\tdecl\t%edx\n\tjnz\tL$oop_rshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__rshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rsi\n\tmovq\t0(%rcx),%r14\n\tandq\t%r8,%rsi\n\tmovq\t8(%rcx),%r15\n\tnegq\t%rsi\n\tmovq\t16(%rcx),%rax\n\tandq\t%rsi,%r14\n\tmovq\t24(%rcx),%rbx\n\tandq\t%rsi,%r15\n\tmovq\t32(%rcx),%rbp\n\tandq\t%rsi,%rax\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t40(%rcx),%rsi\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rax\n\tadcq\t%r11,%rbx\n\tadcq\t%r12,%rbp\n\tadcq\t%r13,%rsi\n\tsbbq\t%r13,%r13\n\n\tshrq\t$1,%r14\n\tmovq\t%r15,%r8\n\tshrq\t$1,%r15\n\tmovq\t%rax,%r9\n\tshrq\t$1,%rax\n\tmovq\t%rbx,%r10\n\tshrq\t$1,%rbx\n\tmovq\t%rbp,%r11\n\tshrq\t$1,%rbp\n\tmovq\t%rsi,%r12\n\tshrq\t$1,%rsi\n\tshlq\t$63,%r8\n\tshlq\t$63,%r9\n\torq\t%r14,%r8\n\tshlq\t$63,%r10\n\torq\t%r15,%r9\n\tshlq\t$63,%r11\n\torq\t%rax,%r10\n\tshlq\t$63,%r12\n\torq\t%rbx,%r11\n\tshlq\t$63,%r13\n\torq\t%rbp,%r12\n\torq\t%rsi,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r14\n\tlfence\n\tjmpq\t*%r14\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_div_by_2_mod_384\n.private_extern\t_div_by_2_mod_384\n\n.p2align\t5\n_div_by_2_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t%rdx,%rcx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tcall\t__rshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_lshift_mod_384\n.private_extern\t_lshift_mod_384\n\n.p2align\t5\n_lshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\nL$oop_lshift_mod_384:\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%r14\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%r15\n\tadcq\t%r12,%r12\n\tmovq\t%r10,%rax\n\tadcq\t%r13,%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdi,%rdi\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdi\n\n\tmovq\t(%rsp),%rdi\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tcmovcq\t%rbx,%r11\n\tcmovcq\t%rbp,%r12\n\tcmovcq\t%rsi,%r13\n\n\tdecl\t%edx\n\tjnz\tL$oop_lshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__lshift_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%r14\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%r15\n\tadcq\t%r12,%r12\n\tmovq\t%r10,%rax\n\tadcq\t%r13,%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tcmovcq\t%rbx,%r11\n\tcmovcq\t%rbp,%r12\n\tcmovcq\t%rsi,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.globl\t_mul_by_3_mod_384\n.private_extern\t_mul_by_3_mod_384\n\n.p2align\t5\n_mul_by_3_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_mul_by_8_mod_384\n.private_extern\t_mul_by_8_mod_384\n\n.p2align\t5\n_mul_by_8_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_mul_by_3_mod_384x\n.private_extern\t_mul_by_3_mod_384x\n\n.p2align\t5\n_mul_by_3_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rdi),%rdi\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rsi),%r8\n\tmovq\t56(%rsi),%r9\n\tmovq\t64(%rsi),%r10\n\tmovq\t72(%rsi),%r11\n\tmovq\t80(%rsi),%r12\n\tmovq\t88(%rsi),%r13\n\n\tcall\t__lshift_mod_384\n\n\tmovq\t$48,%rdx\n\taddq\t(%rsp),%rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_mul_by_8_mod_384x\n.private_extern\t_mul_by_8_mod_384x\n\n.p2align\t5\n_mul_by_8_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t%rdx,%rcx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t(%rsp),%rsi\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48+0(%rsi),%r8\n\tmovq\t48+8(%rsi),%r9\n\tmovq\t48+16(%rsi),%r10\n\tmovq\t48+24(%rsi),%r11\n\tmovq\t48+32(%rsi),%r12\n\tmovq\t48+40(%rsi),%r13\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmovq\t%r8,48+0(%rdi)\n\tmovq\t%r9,48+8(%rdi)\n\tmovq\t%r10,48+16(%rdi)\n\tmovq\t%r11,48+24(%rdi)\n\tmovq\t%r12,48+32(%rdi)\n\tmovq\t%r13,48+40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_cneg_mod_384\n.private_extern\t_cneg_mod_384\n\n.p2align\t5\n_cneg_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdx\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t%rdx,%r8\n\tmovq\t24(%rsi),%r11\n\torq\t%r9,%rdx\n\tmovq\t32(%rsi),%r12\n\torq\t%r10,%rdx\n\tmovq\t40(%rsi),%r13\n\torq\t%r11,%rdx\n\tmovq\t$-1,%rsi\n\torq\t%r12,%rdx\n\torq\t%r13,%rdx\n\n\tmovq\t0(%rcx),%r14\n\tcmovnzq\t%rsi,%rdx\n\tmovq\t8(%rcx),%r15\n\tmovq\t16(%rcx),%rax\n\tandq\t%rdx,%r14\n\tmovq\t24(%rcx),%rbx\n\tandq\t%rdx,%r15\n\tmovq\t32(%rcx),%rbp\n\tandq\t%rdx,%rax\n\tmovq\t40(%rcx),%rsi\n\tandq\t%rdx,%rbx\n\tmovq\t0(%rsp),%rcx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\tsubq\t%r8,%r14\n\tsbbq\t%r9,%r15\n\tsbbq\t%r10,%rax\n\tsbbq\t%r11,%rbx\n\tsbbq\t%r12,%rbp\n\tsbbq\t%r13,%rsi\n\n\torq\t%rcx,%rcx\n\n\tcmovzq\t%r8,%r14\n\tcmovzq\t%r9,%r15\n\tcmovzq\t%r10,%rax\n\tmovq\t%r14,0(%rdi)\n\tcmovzq\t%r11,%rbx\n\tmovq\t%r15,8(%rdi)\n\tcmovzq\t%r12,%rbp\n\tmovq\t%rax,16(%rdi)\n\tcmovzq\t%r13,%rsi\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rsi,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.globl\t_sub_mod_384\n.private_extern\t_sub_mod_384\n\n.p2align\t5\n_sub_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tcall\t__sub_mod_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__sub_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_sub_mod_384x\n.private_extern\t_sub_mod_384x\n\n.p2align\t5\n_sub_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$24,%rsp\n.cfi_adjust_cfa_offset\t24\n\n\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rdx),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__sub_mod_384\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__sub_mod_384\n\n\tmovq\t24+0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t24+8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24+16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t24+24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t24+32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t24+40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24+48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_mul_by_1_plus_i_mod_384x\n.private_extern\t_mul_by_1_plus_i_mod_384x\n\n.p2align\t5\n_mul_by_1_plus_i_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$56,%rsp\n.cfi_adjust_cfa_offset\t56\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rbx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rcx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\tmovq\t%rdi,48(%rsp)\n\tsbbq\t%rdi,%rdi\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rbx\n\tsbbq\t80(%rsi),%rcx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rsi,%rsi\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t0(%rdx),%r8\n\tmovq\t%r9,8(%rsp)\n\tmovq\t8(%rdx),%r9\n\tmovq\t%r10,16(%rsp)\n\tmovq\t16(%rdx),%r10\n\tmovq\t%r11,24(%rsp)\n\tmovq\t24(%rdx),%r11\n\tmovq\t%r12,32(%rsp)\n\tandq\t%rsi,%r8\n\tmovq\t32(%rdx),%r12\n\tmovq\t%r13,40(%rsp)\n\tandq\t%rsi,%r9\n\tmovq\t40(%rdx),%r13\n\tandq\t%rsi,%r10\n\tandq\t%rsi,%r11\n\tandq\t%rsi,%r12\n\tandq\t%rsi,%r13\n\tmovq\t48(%rsp),%rsi\n\n\taddq\t%r8,%r14\n\tmovq\t0(%rsp),%r8\n\tadcq\t%r9,%r15\n\tmovq\t8(%rsp),%r9\n\tadcq\t%r10,%rax\n\tmovq\t16(%rsp),%r10\n\tadcq\t%r11,%rbx\n\tmovq\t24(%rsp),%r11\n\tadcq\t%r12,%rcx\n\tmovq\t32(%rsp),%r12\n\tadcq\t%r13,%rbp\n\tmovq\t40(%rsp),%r13\n\n\tmovq\t%r14,0(%rsi)\n\tmovq\t%r8,%r14\n\tmovq\t%r15,8(%rsi)\n\tmovq\t%rax,16(%rsi)\n\tmovq\t%r9,%r15\n\tmovq\t%rbx,24(%rsi)\n\tmovq\t%rcx,32(%rsi)\n\tmovq\t%r10,%rax\n\tmovq\t%rbp,40(%rsi)\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t%r11,%rbx\n\tsbbq\t8(%rdx),%r9\n\tsbbq\t16(%rdx),%r10\n\tmovq\t%r12,%rcx\n\tsbbq\t24(%rdx),%r11\n\tsbbq\t32(%rdx),%r12\n\tmovq\t%r13,%rbp\n\tsbbq\t40(%rdx),%r13\n\tsbbq\t$0,%rdi\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,48(%rsi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,56(%rsi)\n\tcmovcq\t%rcx,%r12\n\tmovq\t%r10,64(%rsi)\n\tcmovcq\t%rbp,%r13\n\tmovq\t%r11,72(%rsi)\n\tmovq\t%r12,80(%rsi)\n\tmovq\t%r13,88(%rsi)\n\n\tmovq\t56+0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t56+8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t56+16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t56+24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56+32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t56+40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56+48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sgn0_pty_mod_384\n.private_extern\t_sgn0_pty_mod_384\n\n.p2align\t5\n_sgn0_pty_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%rcx\n\tmovq\t40(%rdi),%rdx\n\n\txorq\t%rax,%rax\n\tmovq\t%r8,%rdi\n\taddq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rsi),%r8\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%rdi\n\tandq\t$2,%rax\n\torq\t%rdi,%rax\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sgn0_pty_mod_384x\n.private_extern\t_sgn0_pty_mod_384x\n\n.p2align\t5\n_sgn0_pty_mod_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rdi),%r8\n\tmovq\t56(%rdi),%r9\n\tmovq\t64(%rdi),%r10\n\tmovq\t72(%rdi),%r11\n\tmovq\t80(%rdi),%rcx\n\tmovq\t88(%rdi),%rdx\n\n\tmovq\t%r8,%rbx\n\torq\t%r9,%r8\n\torq\t%r10,%r8\n\torq\t%r11,%r8\n\torq\t%rcx,%r8\n\torq\t%rdx,%r8\n\n\tleaq\t0(%rdi),%rax\n\txorq\t%rdi,%rdi\n\tmovq\t%rbx,%rbp\n\taddq\t%rbx,%rbx\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rsi),%rbx\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r8,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%rbp\n\tandq\t$2,%rdi\n\torq\t%rbp,%rdi\n\n\tmovq\t0(%rax),%r8\n\tmovq\t8(%rax),%r9\n\tmovq\t16(%rax),%r10\n\tmovq\t24(%rax),%r11\n\tmovq\t32(%rax),%rcx\n\tmovq\t40(%rax),%rdx\n\n\tmovq\t%r8,%rbx\n\torq\t%r9,%r8\n\torq\t%r10,%r8\n\torq\t%r11,%r8\n\torq\t%rcx,%r8\n\torq\t%rdx,%r8\n\n\txorq\t%rax,%rax\n\tmovq\t%rbx,%rbp\n\taddq\t%rbx,%rbx\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t%rcx,%rcx\n\tadcq\t%rdx,%rdx\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rsi),%rbx\n\tsbbq\t8(%rsi),%r9\n\tsbbq\t16(%rsi),%r10\n\tsbbq\t24(%rsi),%r11\n\tsbbq\t32(%rsi),%rcx\n\tsbbq\t40(%rsi),%rdx\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%rbx\n\n\tnotq\t%rax\n\n\ttestq\t%r8,%r8\n\tcmovzq\t%rdi,%rbp\n\n\ttestq\t%rbx,%rbx\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%rbp\n\tandq\t$2,%rax\n\torq\t%rbp,%rax\n\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_vec_select_32\n.private_extern\t_vec_select_32\n\n.p2align\t5\n_vec_select_32:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t16(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t16(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t16(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-16(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-16(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-16(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-16(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_select_48\n.private_extern\t_vec_select_48\n\n.p2align\t5\n_vec_select_48:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t24(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t24(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t24(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-24(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-24(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-24(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-24(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-24(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-24(%rdi)\n\tpand\t%xmm4,%xmm0\n\tpand\t%xmm5,%xmm1\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_select_96\n.private_extern\t_vec_select_96\n\n.p2align\t5\n_vec_select_96:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t48(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t48(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t48(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-48(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-48(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-48(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-48(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-48(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-48(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-48(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-48(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-48(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-48(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-48(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-48(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-48(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-48(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-48(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-48(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_select_192\n.private_extern\t_vec_select_192\n\n.p2align\t5\n_vec_select_192:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t96(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t96(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t96(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t128+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t128+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t144+16-96(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t144+16-96(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,144-96(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t160+16-96(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t160+16-96(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,160-96(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,176-96(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_select_144\n.private_extern\t_vec_select_144\n\n.p2align\t5\n_vec_select_144:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t72(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t72(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t72(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-72(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-72(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-72(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-72(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-72(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-72(%rdi)\n\tpand\t%xmm4,%xmm0\n\tpand\t%xmm5,%xmm1\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-72(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_select_288\n.private_extern\t_vec_select_288\n\n.p2align\t5\n_vec_select_288:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovd\t%ecx,%xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t$0,%xmm5,%xmm5\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rsi),%xmm0\n\tleaq\t144(%rsi),%rsi\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t(%rdx),%xmm1\n\tleaq\t144(%rdx),%rdx\n\tpcmpeqd\t%xmm5,%xmm4\n\tleaq\t144(%rdi),%rdi\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t0+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t0+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,0-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t16+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t16+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,16-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t32+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t32+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,32-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t48+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t48+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,48-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t64+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t64+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,64-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t80+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t80+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,80-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t96+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t96+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,96-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t112+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t112+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,112-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t128+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t128+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,128-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t144+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t144+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,144-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t160+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t160+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,160-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t176+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t176+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,176-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t192+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t192+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,192-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t208+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t208+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,208-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t224+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t224+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,224-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tmovdqu\t240+16-144(%rsi),%xmm0\n\tpand\t%xmm5,%xmm3\n\tmovdqu\t240+16-144(%rdx),%xmm1\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,240-144(%rdi)\n\tpand\t%xmm4,%xmm0\n\tmovdqu\t256+16-144(%rsi),%xmm2\n\tpand\t%xmm5,%xmm1\n\tmovdqu\t256+16-144(%rdx),%xmm3\n\tpor\t%xmm1,%xmm0\n\tmovdqu\t%xmm0,256-144(%rdi)\n\tpand\t%xmm4,%xmm2\n\tpand\t%xmm5,%xmm3\n\tpor\t%xmm3,%xmm2\n\tmovdqu\t%xmm2,272-144(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_prefetch\n.private_extern\t_vec_prefetch\n\n.p2align\t5\n_vec_prefetch:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tleaq\t-1(%rdi,%rsi,1),%rsi\n\tmovq\t$64,%rax\n\txorq\t%r8,%r8\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tcmovaq\t%r8,%rax\n\tprefetchnta\t(%rdi)\n\tleaq\t(%rdi,%rax,1),%rdi\n\tcmpq\t%rsi,%rdi\n\tcmovaq\t%rsi,%rdi\n\tprefetchnta\t(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_is_zero_16x\n.private_extern\t_vec_is_zero_16x\n\n.p2align\t5\n_vec_is_zero_16x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tshrl\t$4,%esi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdi),%xmm0\n\tleaq\t16(%rdi),%rdi\n\nL$oop_is_zero:\n\tdecl\t%esi\n\tjz\tL$oop_is_zero_done\n\tmovdqu\t(%rdi),%xmm1\n\tleaq\t16(%rdi),%rdi\n\tpor\t%xmm1,%xmm0\n\tjmp\tL$oop_is_zero\n\nL$oop_is_zero_done:\n\tpshufd\t$0x4e,%xmm0,%xmm1\n\tpor\t%xmm1,%xmm0\n.byte\t102,72,15,126,192\n\tincl\t%esi\n\ttestq\t%rax,%rax\n\tcmovnzl\t%esi,%eax\n\txorl\t$1,%eax\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_vec_is_equal_16x\n.private_extern\t_vec_is_equal_16x\n\n.p2align\t5\n_vec_is_equal_16x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tshrl\t$4,%edx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t(%rdi),%xmm0\n\tmovdqu\t(%rsi),%xmm1\n\tsubq\t%rdi,%rsi\n\tleaq\t16(%rdi),%rdi\n\tpxor\t%xmm1,%xmm0\n\nL$oop_is_equal:\n\tdecl\t%edx\n\tjz\tL$oop_is_equal_done\n\tmovdqu\t(%rdi),%xmm1\n\tmovdqu\t(%rdi,%rsi,1),%xmm2\n\tleaq\t16(%rdi),%rdi\n\tpxor\t%xmm2,%xmm1\n\tpor\t%xmm1,%xmm0\n\tjmp\tL$oop_is_equal\n\nL$oop_is_equal_done:\n\tpshufd\t$0x4e,%xmm0,%xmm1\n\tpor\t%xmm1,%xmm0\n.byte\t102,72,15,126,192\n\tincl\t%edx\n\ttestq\t%rax,%rax\n\tcmovnzl\t%edx,%eax\n\txorl\t$1,%eax\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/add_mod_384x384-x86_64.s",
    "content": ".text\t\n\n.globl\t_add_mod_384x384\n.private_extern\t_add_mod_384x384\n\n.p2align\t5\n_add_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\taddq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tadcq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tadcq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tadcq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tadcq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tadcq\t48(%rdx),%r14\n\tmovq\t%r9,8(%rdi)\n\tadcq\t56(%rdx),%r15\n\tmovq\t%r10,16(%rdi)\n\tadcq\t64(%rdx),%rax\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r14,%r8\n\tadcq\t72(%rdx),%rbx\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r15,%r9\n\tadcq\t80(%rdx),%rbp\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%rax,%r10\n\tadcq\t88(%rdx),%rsi\n\tmovq\t%rbx,%r11\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%rbp,%r12\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%rbx\n\tsbbq\t32(%rcx),%rbp\n\tmovq\t%rsi,%r13\n\tsbbq\t40(%rcx),%rsi\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r8,%r14\n\tcmovcq\t%r9,%r15\n\tcmovcq\t%r10,%rax\n\tmovq\t%r14,48(%rdi)\n\tcmovcq\t%r11,%rbx\n\tmovq\t%r15,56(%rdi)\n\tcmovcq\t%r12,%rbp\n\tmovq\t%rax,64(%rdi)\n\tcmovcq\t%r13,%rsi\n\tmovq\t%rbx,72(%rdi)\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sub_mod_384x384\n.private_extern\t_sub_mod_384x384\n\n.p2align\t5\n_sub_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n"
  },
  {
    "path": "build/mach-o/ct_inverse_mod_256-armv8.S",
    "content": ".text\n\n.globl\t_ct_inverse_mod_256\n.private_extern\t_ct_inverse_mod_256\n\n.align\t5\n_ct_inverse_mod_256:\n\thint\t#25\n\tstp\tx29, x30, [sp,#-10*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1040\n\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx6, x7, [x1,#8*2]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#16+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #16+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstr\tx0, [sp]\t\t// offload out_ptr\n\n\tldp\tx8, x9, [x2,#8*0]\n\tldp\tx10, x11, [x2,#8*2]\n\n\tstp\tx4, x5, [x1,#8*0]\t// copy input to |a|\n\tstp\tx6, x7, [x1,#8*2]\n\tstp\tx8, x9, [x1,#8*4]\t// copy modulus to |b|\n\tstp\tx10, x11, [x1,#8*6]\n\n\t////////////////////////////////////////// first iteration\n\tbl\tLab_approximation_31_256_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12,[x0,#8*8]\t\t// initialize |u| with |f0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12, [x0,#8*10]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tldr\tx8, [x1,#8*8]\t\t// |u|\n\tldr\tx9, [x1,#8*14]\t// |v|\n\tmadd\tx4, x16, x8, xzr\t// |u|*|f0|\n\tmadd\tx4, x17, x9, x4\t// |v|*|g0|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [x0,#8*4]\n\tstp\tx5, x5, [x0,#8*6]\n\n\tmadd\tx4, x12, x8, xzr\t// |u|*|f1|\n\tmadd\tx4, x13, x9, x4\t// |v|*|g1|\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx4, x5, [x0,#8*10]\n\tstp\tx5, x5, [x0,#8*12]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\n\tstr\tx24, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\t\t// sign extension\n\tstp\tx24, x24, [x0,#8*4]\n\tstp\tx24, x24, [x0,#8*6]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\t\t\t// corrected |f0|\n\tmov\tx17, x13\t\t\t// corrected |g0|\n\n\tmov\tx12, x14\t\t\t// |f1|\n\tmov\tx13, x15\t\t\t// |g1|\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\t\t\t// corrected |f1|\n\tmov\tx17, x13\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\t////////////////////////////////////////// two[!] last iterations\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #47\t\t\t// 31 + 512 % 31\n\t//bl\t__ab_approximation_62_256\t// |a| and |b| are exact,\n\tldr\tx7, [x1,#8*0]\t\t// just load\n\tldr\tx11, [x1,#8*4]\n\tbl\t__inner_loop_62_256\n\n\tmov\tx16, x14\n\tmov\tx17, x15\n\tldr\tx0, [sp]\t\t\t// original out_ptr\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx20, x7, x17\t\t// figure out top-most limb\n\tldp\tx8, x9, [x3,#8*0]\n\tadc\tx23, x23, x25\n\tldp\tx10, x11, [x3,#8*2]\n\n\tadd\tx20, x20, x23\t\t// x20 is 1, 0 or -1\n\tasr\tx19, x20, #63\t\t// sign as mask\n\n\tand\tx23,   x8, x19\t\t// add mod<<256 conditionally\n\tand\tx24,   x9, x19\n\tadds\tx4, x4, x23\n\tand\tx25,   x10, x19\n\tadcs\tx5, x5, x24\n\tand\tx26,   x11, x19\n\tadcs\tx6, x6, x25\n\tadcs\tx7, x22,   x26\n\tadc\tx20, x20, xzr\t\t// x20 is 1, 0 or -1\n\n\tneg\tx19, x20\n\torr\tx20, x20, x19\t\t// excess bit or sign as mask\n\tasr\tx19, x19, #63\t\t// excess bit as mask\n\n\tand\tx8, x8, x20\t\t// mask |mod|\n\tand\tx9, x9, x20\n\tand\tx10, x10, x20\n\tand\tx11, x11, x20\n\n\teor\tx8, x8, x19\t\t// conditionally negate |mod|\n\teor\tx9, x9, x19\n\tadds\tx8, x8, x19, lsr#63\n\teor\tx10, x10, x19\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x19\n\tadcs\tx10, x10, xzr\n\tadc\tx11, x11, xzr\n\n\tadds\tx4, x4, x8\t// final adjustment for |mod|<<256\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*4]\n\tadc\tx7, x7, x11\n\tstp\tx6, x7, [x0,#8*6]\n\n\tadd\tsp, sp, #1040\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#10*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n////////////////////////////////////////////////////////////////////////\n\n.align\t5\n__smul_256x63:\n\tldp\tx4, x5, [x1,#8*0+64]\t// load |u| (or |v|)\n\tasr\tx14, x16, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx6, x7, [x1,#8*2+64]\n\teor\tx16, x16, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx22, [x1,#8*4+64]\n\n\teor\tx4, x4, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx16, x16, x14\n\teor\tx5, x5, x14\n\tadds\tx4, x4, x14, lsr#63\n\teor\tx6, x6, x14\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\teor\tx22, x22, x14\n\tumulh\tx19, x4, x16\n\tadcs\tx7, x7, xzr\n\tumulh\tx20, x5, x16\n\tadcs\tx22, x22, xzr\n\tumulh\tx21, x6, x16\n\tmul\tx4, x4, x16\n\tcmp\tx16, #0\n\tmul\tx5, x5, x16\n\tcsel\tx22, x22, xzr, ne\n\tmul\tx6, x6, x16\n\tadds\tx5, x5, x19\n\tmul\tx24, x7, x16\n\tadcs\tx6, x6, x20\n\tadcs\tx24, x24, x21\n\tadc\tx26, xzr, xzr\n\tldp\tx8, x9, [x1,#8*0+112]\t// load |u| (or |v|)\n\tasr\tx14, x17, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx10, x11, [x1,#8*2+112]\n\teor\tx17, x17, x14\t\t// conditionally negate |f_| (or |g_|)\n\tldr\tx23, [x1,#8*4+112]\n\n\teor\tx8, x8, x14\t// conditionally negate |u| (or |v|)\n\tsub\tx17, x17, x14\n\teor\tx9, x9, x14\n\tadds\tx8, x8, x14, lsr#63\n\teor\tx10, x10, x14\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x14\n\tadcs\tx10, x10, xzr\n\teor\tx23, x23, x14\n\tumulh\tx19, x8, x17\n\tadcs\tx11, x11, xzr\n\tumulh\tx20, x9, x17\n\tadcs\tx23, x23, xzr\n\tumulh\tx21, x10, x17\n\tadc\tx15, xzr, xzr\t\t// used in __smul_512x63_tail\n\tmul\tx8, x8, x17\n\tcmp\tx17, #0\n\tmul\tx9, x9, x17\n\tcsel\tx23, x23, xzr, ne\n\tmul\tx10, x10, x17\n\tadds\tx9, x9, x19\n\tmul\tx25, x11, x17\n\tadcs\tx10, x10, x20\n\tadcs\tx25, x25, x21\n\tadc\tx26, x26, xzr\n\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*0]\n\tadcs\tx24,   x24,   x25\n\tstp\tx6, x24, [x0,#8*2]\n\n\tret\n\n\n\n.align\t5\n__smul_512x63_tail:\n\tumulh\tx24, x7, x16\n\tldr\tx5, [x1,#8*19]\t// load rest of |v|\n\tadc\tx26, x26, xzr\n\tldp\tx6, x7, [x1,#8*20]\n\tand\tx22, x22, x16\n\n\tumulh\tx11, x11, x17\t// resume |v|*|g1| chain\n\n\tsub\tx24, x24, x22\t// tie up |u|*|f1| chain\n\tasr\tx25, x24, #63\n\n\teor\tx5, x5, x14\t// conditionally negate rest of |v|\n\teor\tx6, x6, x14\n\tadds\tx5, x5, x15\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\tumulh\tx19, x23,   x17\n\tadc\tx7, x7, xzr\n\tumulh\tx20, x5, x17\n\tadd\tx11, x11, x26\n\tumulh\tx21, x6, x17\n\n\tmul\tx4, x23,   x17\n\tmul\tx5, x5, x17\n\tadds\tx4, x4, x11\n\tmul\tx6, x6, x17\n\tadcs\tx5, x5, x19\n\tmul\tx22,   x7, x17\n\tadcs\tx6, x6, x20\n\tadcs\tx22,   x22,   x21\n\tadc\tx23, xzr, xzr\t\t// used in the final step\n\n\tadds\tx4, x4, x24\n\tadcs\tx5, x5, x25\n\tadcs\tx6, x6, x25\n\tstp\tx4, x5, [x0,#8*4]\n\tadcs\tx22,   x22,   x25\t// carry is used in the final step\n\tstp\tx6, x22,   [x0,#8*6]\n\n\tret\n\n\n\n.align\t5\n__smul_256_n_shift_by_31:\n\tldp\tx4, x5, [x1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx24, x12, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx6, x7, [x1,#8*2+0]\n\teor\tx25, x12, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx4, x4, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx5, x5, x24\n\tadds\tx4, x4, x24, lsr#63\n\teor\tx6, x6, x24\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x24\n\tumulh\tx19, x4, x25\n\tadcs\tx6, x6, xzr\n\tumulh\tx20, x5, x25\n\tadc\tx7, x7, xzr\n\tumulh\tx21, x6, x25\n\tand\tx24, x24, x25\n\tumulh\tx22, x7, x25\n\tneg\tx24, x24\n\n\tmul\tx4, x4, x25\n\tmul\tx5, x5, x25\n\tmul\tx6, x6, x25\n\tadds\tx5, x5, x19\n\tmul\tx7, x7, x25\n\tadcs\tx6, x6, x20\n\tadcs\tx7, x7, x21\n\tadc\tx22, x22, x24\n\tldp\tx8, x9, [x1,#8*0+32]\t// load |a| (or |b|)\n\tasr\tx24, x13, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx10, x11, [x1,#8*2+32]\n\teor\tx25, x13, x24\t// conditionally negate |f0| (or |g0|)\n\n\teor\tx8, x8, x24\t// conditionally negate |a| (or |b|)\n\tsub\tx25, x25, x24\n\teor\tx9, x9, x24\n\tadds\tx8, x8, x24, lsr#63\n\teor\tx10, x10, x24\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x24\n\tumulh\tx19, x8, x25\n\tadcs\tx10, x10, xzr\n\tumulh\tx20, x9, x25\n\tadc\tx11, x11, xzr\n\tumulh\tx21, x10, x25\n\tand\tx24, x24, x25\n\tumulh\tx23, x11, x25\n\tneg\tx24, x24\n\n\tmul\tx8, x8, x25\n\tmul\tx9, x9, x25\n\tmul\tx10, x10, x25\n\tadds\tx9, x9, x19\n\tmul\tx11, x11, x25\n\tadcs\tx10, x10, x20\n\tadcs\tx11, x11, x21\n\tadc\tx23, x23, x24\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tadcs\tx7, x7, x11\n\tadc\tx8, x22,   x23\n\n\textr\tx4, x5, x4, #31\n\textr\tx5, x6, x5, #31\n\textr\tx6, x7, x6, #31\n\tasr\tx23, x8, #63\t// result's sign as mask\n\textr\tx7, x8, x7, #31\n\n\teor\tx4, x4, x23\t// ensure the result is positive\n\teor\tx5, x5, x23\n\tadds\tx4, x4, x23, lsr#63\n\teor\tx6, x6, x23\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x23\n\tadcs\tx6, x6, xzr\n\tstp\tx4, x5, [x0,#8*0]\n\tadc\tx7, x7, xzr\n\tstp\tx6, x7, [x0,#8*2]\n\n\teor\tx12, x12, x23\t\t// adjust |f/g| accordingly\n\teor\tx13, x13, x23\n\tsub\tx12, x12, x23\n\tsub\tx13, x13, x23\n\n\tret\n\n\n.align\t4\n__ab_approximation_31_256:\n\tldp\tx6, x7, [x1,#8*2]\n\tldp\tx10, x11, [x1,#8*6]\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx8, x9, [x1,#8*4]\n\nLab_approximation_31_256_loaded:\n\torr\tx19, x7, x11\t// check top-most limbs, ...\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x5, ne\n\torr\tx19, x7, x11\t// and ones before top-most, ...\n\tcsel\tx10, x10, x9, ne\n\n\tcmp\tx19, #0\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tcsel\tx6, x6, x4, ne\n\torr\tx19, x7, x11\t// and one more, ...\n\tcsel\tx10, x10, x8, ne\n\n\tclz\tx19, x19\n\tcmp\tx19, #64\n\tcsel\tx19, x19, xzr, ne\n\tcsel\tx7, x7, x6, ne\n\tcsel\tx11, x11, x10, ne\n\tneg\tx20, x19\n\n\tlslv\tx7, x7, x19\t// align high limbs to the left\n\tlslv\tx11, x11, x19\n\tlsrv\tx6, x6, x20\n\tlsrv\tx10, x10, x20\n\tand\tx6, x6, x20, asr#6\n\tand\tx10, x10, x20, asr#6\n\torr\tx7, x7, x6\n\torr\tx11, x11, x10\n\n\tbfxil\tx7, x4, #0, #31\n\tbfxil\tx11, x8, #0, #31\n\n\tb\t__inner_loop_31_256\n\tret\n\n\n\n.align\t4\n__inner_loop_31_256:\n\tmov\tx2, #31\n\tmov\tx13, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx15, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx23,#0x7FFFFFFF7FFFFFFF\n\nLoop_31_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x15\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx15, x15, x13,    hs\t// exchange |fg0| and |fg1|\n\tcsel\tx13, x13, x19,   hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x15, x22\n\tand\tx20, x23, x22\n\tsub\tx13, x13, x19\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx15, x15, x15\t// |f1|<<=1\n\tadd\tx13, x13, x20\n\tsub\tx15, x15, x23\n\tcbnz\tx2, Loop_31_256\n\n\tmov\tx23, #0x7FFFFFFF\n\tubfx\tx12, x13, #0, #32\n\tubfx\tx13, x13, #32, #32\n\tubfx\tx14, x15, #0, #32\n\tubfx\tx15, x15, #32, #32\n\tsub\tx12, x12, x23\t\t// remove bias\n\tsub\tx13, x13, x23\n\tsub\tx14, x14, x23\n\tsub\tx15, x15, x23\n\n\tret\n\n\n\n.align\t4\n__inner_loop_62_256:\n\tmov\tx12, #1\t\t// |f0|=1\n\tmov\tx13, #0\t\t// |g0|=0\n\tmov\tx14, #0\t\t// |f1|=0\n\tmov\tx15, #1\t\t// |g1|=1\n\nLoop_62_256:\n\tsbfx\tx22, x7, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\t// |b_|-|a_|\n\tsubs\tx21, x7, x19\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx19, x12\n\tcsel\tx11, x11, x7, hs\t// |b_| = |a_|\n\tcsel\tx7, x21, x20, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tmov\tx20, x13\n\tcsel\tx12, x12, x14,       hs\t// exchange |f0| and |f1|\n\tcsel\tx14, x14, x19,     hs\n\tcsel\tx13, x13, x15,       hs\t// exchange |g0| and |g1|\n\tcsel\tx15, x15, x20,     hs\n\tlsr\tx7, x7, #1\n\tand\tx19, x14, x22\n\tand\tx20, x15, x22\n\tadd\tx14, x14, x14\t\t// |f1|<<=1\n\tadd\tx15, x15, x15\t\t// |g1|<<=1\n\tsub\tx12, x12, x19\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx13, x13, x20\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, Loop_62_256\n\n\tret\n\n"
  },
  {
    "path": "build/mach-o/ct_inverse_mod_256-x86_64.s",
    "content": ".text\t\n\n.globl\t_ct_inverse_mod_256\n.private_extern\t_ct_inverse_mod_256\n\n.p2align\t5\n_ct_inverse_mod_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$1072,%rsp\n.cfi_adjust_cfa_offset\t1072\n\n\n\tleaq\t48+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\n\tmovq\t0(%rdx),%r12\n\tmovq\t8(%rdx),%r13\n\tmovq\t16(%rdx),%r14\n\tmovq\t24(%rdx),%r15\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rax,%rsi\n\n\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmovq\t%rdx,64(%rdi)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmovq\t%rdx,72(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\n\tmovq\t64(%rsi),%r8\n\tmovq\t104(%rsi),%r12\n\tmovq\t%r8,%r9\n\timulq\t0(%rsp),%r8\n\tmovq\t%r12,%r13\n\timulq\t8(%rsp),%r12\n\taddq\t%r12,%r8\n\tmovq\t%r8,32(%rdi)\n\tsarq\t$63,%r8\n\tmovq\t%r8,40(%rdi)\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r8,56(%rdi)\n\tmovq\t%r8,64(%rdi)\n\tleaq\t64(%rsi),%rsi\n\n\timulq\t%rdx,%r9\n\timulq\t%rcx,%r13\n\taddq\t%r13,%r9\n\tmovq\t%r9,72(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tmovq\t%r9,104(%rdi)\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_256x63\n\tsarq\t$63,%rbp\n\tmovq\t%rbp,40(%rdi)\n\tmovq\t%rbp,48(%rdi)\n\tmovq\t%rbp,56(%rdi)\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\txorq\t$256+64,%rsi\n\tmovl\t$31,%edx\n\tcall\t__ab_approximation_31_256\n\n\n\tmovq\t%r12,16(%rsp)\n\tmovq\t%r13,24(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,0(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256_n_shift_by_31\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t64(%rsi),%rsi\n\tleaq\t32(%rdi),%rdi\n\tcall\t__smulq_256x63\n\n\tmovq\t16(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tleaq\t40(%rdi),%rdi\n\tcall\t__smulq_512x63\n\n\txorq\t$256+64,%rsi\n\tmovl\t$47,%edx\n\n\tmovq\t0(%rsi),%r8\n\n\tmovq\t32(%rsi),%r10\n\n\tcall\t__inner_loop_62_256\n\n\n\n\n\n\n\n\tleaq\t64(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulq_512x63\n\tadcq\t%rbp,%rdx\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rdx\n\n\tmovq\t%rdx,%r8\n\tmovq\t%rdx,%r9\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tandq\t0(%rsi),%r8\n\tmovq\t%rdx,%r10\n\tandq\t8(%rsi),%r9\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%rdx\n\n\taddq\t%r8,%r12\n\tadcq\t%r9,%r13\n\tadcq\t%r10,%r14\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%rax\n\n\tmovq\t%rax,%rdx\n\tnegq\t%rax\n\torq\t%rax,%rdx\n\tsarq\t$63,%rax\n\n\tmovq\t%rdx,%r8\n\tmovq\t%rdx,%r9\n\tandq\t0(%rsi),%r8\n\tmovq\t%rdx,%r10\n\tandq\t8(%rsi),%r9\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rcx,%rcx\n\txorq\t%rax,%r9\n\tsubq\t%rax,%rcx\n\txorq\t%rax,%r10\n\txorq\t%rax,%rdx\n\taddq\t%rcx,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%rdx\n\n\taddq\t%r8,%r12\n\tadcq\t%r9,%r13\n\tadcq\t%r10,%r14\n\tadcq\t%rdx,%r15\n\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\n\tleaq\t1072(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-1072-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__smulq_512x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%rbp\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%rbp\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%rbp\n\n\tmulq\t%rbx\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%rdx,%r11\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t%r11,24(%rdi)\n\n\tmovq\t40(%rsi),%r8\n\tmovq\t48(%rsi),%r9\n\tmovq\t56(%rsi),%r10\n\tmovq\t64(%rsi),%r11\n\tmovq\t72(%rsi),%r12\n\tmovq\t80(%rsi),%r13\n\tmovq\t88(%rsi),%r14\n\tmovq\t96(%rsi),%r15\n\n\tmovq\t%rcx,%rdx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rcx\n\taddq\t%rax,%rcx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\n\tmulq\t%rcx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rcx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rcx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rcx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rcx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rcx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rcx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\timulq\t%rcx\n\taddq\t%rax,%r15\n\tadcq\t$0,%rdx\n\n\tmovq\t%rbp,%rbx\n\tsarq\t$63,%rbp\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t%rbx,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t%rbp,%r14\n\tadcq\t%rbp,%r15\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__smulq_256x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%rbp\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%rbp\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%rbp\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t%rcx,%rdx\n\tmovq\t40+0(%rsi),%r12\n\tmovq\t40+8(%rsi),%r13\n\tmovq\t40+16(%rsi),%r14\n\tmovq\t40+24(%rsi),%r15\n\tmovq\t40+32(%rsi),%rcx\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\txorq\t%rdx,%rcx\n\taddq\t%r12,%rax\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rcx\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tmovq\t%rdx,%r13\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tandq\t%rbx,%rcx\n\tnegq\t%rcx\n\tmulq\t%rbx\n\taddq\t%rax,%r15\n\tadcq\t%rdx,%rcx\n\taddq\t%r12,%r8\n\tadcq\t%r13,%r9\n\tadcq\t%r14,%r10\n\tadcq\t%r15,%r11\n\tadcq\t%rcx,%rbp\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__smulq_256_n_shift_by_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,0(%rdi)\n\tmovq\t%rcx,8(%rdi)\n\tmovq\t%rdx,%rbp\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\n\tmovq\t%rbp,%rbx\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbx,%rbp\n\tnegq\t%rbp\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tadcq\t%rdx,%rbp\n\tmovq\t32+0(%rsi),%r12\n\tmovq\t32+8(%rsi),%r13\n\tmovq\t32+16(%rsi),%r14\n\tmovq\t32+24(%rsi),%r15\n\n\tmovq\t%rcx,%rbx\n\tsarq\t$63,%rcx\n\txorq\t%rax,%rax\n\tsubq\t%rcx,%rax\n\n\txorq\t%rcx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rcx,%r12\n\txorq\t%rcx,%r13\n\txorq\t%rcx,%r14\n\txorq\t%rcx,%r15\n\taddq\t%r12,%rax\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\n\tmulq\t%rbx\n\tmovq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tandq\t%rbx,%rcx\n\tnegq\t%rcx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tmulq\t%rbx\n\taddq\t%rax,%r15\n\tadcq\t%rdx,%rcx\n\taddq\t%r12,%r8\n\tadcq\t%r13,%r9\n\tadcq\t%r14,%r10\n\tadcq\t%r15,%r11\n\tadcq\t%rcx,%rbp\n\n\tmovq\t0(%rdi),%rdx\n\tmovq\t8(%rdi),%rcx\n\n\tshrdq\t$31,%r9,%r8\n\tshrdq\t$31,%r10,%r9\n\tshrdq\t$31,%r11,%r10\n\tshrdq\t$31,%rbp,%r11\n\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\txorq\t%rbp,%rdx\n\txorq\t%rbp,%rcx\n\taddq\t%rax,%rdx\n\taddq\t%rax,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__ab_approximation_31_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t24(%rsi),%r9\n\tmovq\t56(%rsi),%r11\n\tmovq\t16(%rsi),%rbx\n\tmovq\t48(%rsi),%rbp\n\tmovq\t8(%rsi),%r8\n\tmovq\t40(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t0(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t32(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r9\n\tcmovzq\t%r10,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tmovl\t$0x7FFFFFFF,%eax\n\tandq\t%rax,%r8\n\tandq\t%rax,%r10\n\tnotq\t%rax\n\tandq\t%rax,%r9\n\tandq\t%rax,%r11\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\n\tjmp\t__inner_loop_31_256\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__inner_loop_31_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rcx\n\tmovq\t$0x800000007FFFFFFF,%r13\n\tmovq\t$0x7FFFFFFF7FFFFFFF,%r15\n\nL$oop_31_256:\n\tcmpq\t%r10,%r8\n\tmovq\t%r8,%rax\n\tmovq\t%r10,%rbx\n\tmovq\t%rcx,%rbp\n\tmovq\t%r13,%r14\n\tcmovbq\t%r10,%r8\n\tcmovbq\t%rax,%r10\n\tcmovbq\t%r13,%rcx\n\tcmovbq\t%rbp,%r13\n\n\tsubq\t%r10,%r8\n\tsubq\t%r13,%rcx\n\taddq\t%r15,%rcx\n\n\ttestq\t$1,%rax\n\tcmovzq\t%rax,%r8\n\tcmovzq\t%rbx,%r10\n\tcmovzq\t%rbp,%rcx\n\tcmovzq\t%r14,%r13\n\n\tshrq\t$1,%r8\n\taddq\t%r13,%r13\n\tsubq\t%r15,%r13\n\tsubl\t$1,%edx\n\tjnz\tL$oop_31_256\n\n\tshrq\t$32,%r15\n\tmovl\t%ecx,%edx\n\tmovl\t%r13d,%r12d\n\tshrq\t$32,%rcx\n\tshrq\t$32,%r13\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\tsubq\t%r15,%r12\n\tsubq\t%r15,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__inner_loop_62_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovl\t%edx,%r15d\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t%rdx,%r13\n\tmovq\t%rdx,%r14\n\nL$oop_62_256:\n\txorq\t%rax,%rax\n\ttestq\t%r14,%r8\n\tmovq\t%r10,%rbx\n\tcmovnzq\t%r10,%rax\n\tsubq\t%r8,%rbx\n\tmovq\t%r8,%rbp\n\tsubq\t%rax,%r8\n\tcmovcq\t%rbx,%r8\n\tcmovcq\t%rbp,%r10\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrq\t$1,%r8\n\ttestq\t%r14,%rbp\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%r15d\n\tjnz\tL$oop_62_256\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/ct_inverse_mod_384-armv8.S",
    "content": ".text\n\n.globl\t_ct_inverse_mod_384\n.private_extern\t_ct_inverse_mod_384\n\n.align\t5\n_ct_inverse_mod_384:\n\thint\t#25\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1056\n\n\tldp\tx22,   x4, [x1,#8*0]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx7, x8, [x1,#8*4]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#32+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n#else\n\tadd\tx1, sp, #32+511\t// find closest 512-byte-aligned spot\n\tand\tx1, x1, #-512\t// in the frame...\n#endif\n\tstp\tx0, x3, [sp]\t\t// offload out_ptr, nx_ptr\n\n\tldp\tx9, x10, [x2,#8*0]\n\tldp\tx11, x12, [x2,#8*2]\n\tldp\tx13, x14, [x2,#8*4]\n\n\tstp\tx22,   x4, [x1,#8*0]\t// copy input to |a|\n\tstp\tx5, x6, [x1,#8*2]\n\tstp\tx7, x8, [x1,#8*4]\n\tstp\tx9, x10, [x1,#8*6]\t// copy modulus to |b|\n\tstp\tx11, x12, [x1,#8*8]\n\tstp\tx13, x14, [x1,#8*10]\n\n\t////////////////////////////////////////// first iteration\n\tmov\tx2, #62\n\tbl\tLab_approximation_62_loaded\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15,[x0,#8*12]\t\t// initialize |u| with |f0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15, [x0,#8*14]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tldr\tx7, [x1,#8*12]\t// |u|\n\tldr\tx8, [x1,#8*20]\t// |v|\n\tmul\tx3, x20, x7\t\t// |u|*|f0|\n\tsmulh\tx4, x20, x7\n\tmul\tx5, x21, x8\t\t// |v|*|g0|\n\tsmulh\tx6, x21, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*6]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [x0,#8*8]\n\tstp\tx5, x5, [x0,#8*10]\n\n\tmul\tx3, x15, x7\t\t// |u|*|f1|\n\tsmulh\tx4, x15, x7\n\tmul\tx5, x16, x8\t\t// |v|*|g1|\n\tsmulh\tx6, x16, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*14]\n\tasr\tx5, x4, #63\t\t// sign extension\n\tstp\tx5, x5, [x0,#8*16]\n\tstp\tx5, x5, [x0,#8*18]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\n\tstr\tx27, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\t\t// sign extension\n\tstp\tx27, x27, [x0,#8*6]\n\tstp\tx27, x27, [x0,#8*8]\n\tstp\tx27, x27, [x0,#8*10]\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\t\t\t// corrected |f0|\n\tmov\tx21, x16\t\t\t// corrected |g0|\n\n\tmov\tx15, x17\t\t\t// |f1|\n\tmov\tx16, x19\t\t\t// |g1|\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\t\t\t// corrected |f1|\n\tmov\tx21, x16\t\t\t// corrected |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\t////////////////////////////////////////// iteration before last\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldp\tx3, x8, [x1,#8*0]\t// just load\n\tldp\tx9, x14, [x1,#8*6]\n\tbl\t__inner_loop_62\n\n\teor\tx0, x1, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n#endif\n\tstr\tx3, [x0,#8*0]\n\tstr\tx9, [x0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f0|\n\tmov\tx21, x16\t\t\t// exact |g0|\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*12\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\n\tmov\tx20, x15\t\t\t// exact |f1|\n\tmov\tx21, x16\t\t\t// exact |g1|\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\t////////////////////////////////////////// last iteration\n\teor\tx1, x1, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n#endif\n\tmov\tx2, #24\t\t\t// 768 % 62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldr\tx3, [x1,#8*0]\t\t// just load\n\teor\tx8, x8, x8\n\tldr\tx9, [x1,#8*6]\n\teor\tx14, x14, x14\n\tbl\t__inner_loop_62\n\n\tmov\tx20, x17\n\tmov\tx21, x19\n\tldp\tx0, x15, [sp]\t\t\t// original out_ptr and n_ptr\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx23, x8, x21\t\t// figure out top-most limb\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x15,#8*0]\t// load |mod|\n\tadd\tx23, x23, x26\t\t// x23 is 1, 0 or -1\n\tldp\tx11, x12, [x15,#8*2]\n\tasr\tx22, x23, #63\t\t// sign as mask\n\tldp\tx13, x14, [x15,#8*4]\n\n\tand\tx26,   x9, x22\t\t// add mod<<384 conditionally\n\tand\tx27,   x10, x22\n\tadds\tx3, x3, x26\n\tand\tx28,   x11, x22\n\tadcs\tx4, x4, x27\n\tand\tx2,   x12, x22\n\tadcs\tx5, x5, x28\n\tand\tx26,   x13, x22\n\tadcs\tx6, x6, x2\n\tand\tx27,   x14, x22\n\tadcs\tx7, x7, x26\n\tadcs\tx8, x25,   x27\n\tadc\tx23, x23, xzr\t\t// x23 is 1, 0 or -1\n\n\tneg\tx22, x23\n\torr\tx23, x23, x22\t\t// excess bit or sign as mask\n\tasr\tx22, x22, #63\t\t// excess bit as mask\n\n\tand\tx9, x9, x23\t\t// mask |mod|\n\tand\tx10, x10, x23\n\tand\tx11, x11, x23\n\tand\tx12, x12, x23\n\tand\tx13, x13, x23\n\tand\tx14, x14, x23\n\n\teor\tx9,  x9, x22\t// conditionally negate |mod|\n\teor\tx10,  x10, x22\n\tadds\tx9,  x9, x22, lsr#63\n\teor\tx11,  x11, x22\n\tadcs\tx10,  x10, xzr\n\teor\tx12,  x12, x22\n\tadcs\tx11,  x11, xzr\n\teor\tx13, x13, x22\n\tadcs\tx12,  x12, xzr\n\teor\tx14, x14, x22\n\tadcs\tx13, x13, xzr\n\tadc\tx14, x14, xzr\n\n\tadds\tx3, x3, x9\t// final adjustment for |mod|<<384\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*8]\n\tadc\tx8, x8, x14\n\tstp\tx7, x8, [x0,#8*10]\n\n\tadd\tsp, sp, #1056\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n////////////////////////////////////////////////////////////////////////\n// see corresponding commentary in ctx_inverse_mod_384-x86_64...\n\n.align\t5\n__smul_384x63:\n\tldp\tx3, x4, [x1,#8*0+96]\t// load |u| (or |v|)\n\tasr\tx17, x20, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx5, x6, [x1,#8*2+96]\n\teor\tx20, x20, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx7, x8, [x1,#8*4+96]\n\n\teor\tx3, x3, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx25, [x1,#8*6+96]\n\tsub\tx20, x20, x17\n\teor\tx4, x4, x17\n\tadds\tx3, x3, x17, lsr#63\n\teor\tx5, x5, x17\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x17\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x3, x20\n\teor\tx8, x8, x17\n\tumulh\tx23, x4, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x5, x20\n\teor\tx25, x25, x17\n\tmul\tx3, x3, x20\n\tadcs\tx8, x8, xzr\n\tmul\tx4, x4, x20\n\tadcs\tx25, x25, xzr\n\tcmp\tx20, #0\n\tmul\tx5, x5, x20\n\tcsel\tx25, x25, xzr, ne\n\tadds\tx4, x4, x22\n\tumulh\tx22, x6, x20\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x7, x20\n\tmul\tx6, x6, x20\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x24\n\tmul\tx27,x8, x20\n\tadcs\tx7, x7, x22\n\tadcs\tx27,x27,x23\n\tadc\tx2, xzr, xzr\n\tldp\tx9, x10, [x1,#8*0+160]\t// load |u| (or |v|)\n\tasr\tx17, x21, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\tx11, x12, [x1,#8*2+160]\n\teor\tx21, x21, x17\t\t// conditionally negate |f_| (or |g_|)\n\tldp\tx13, x14, [x1,#8*4+160]\n\n\teor\tx9, x9, x17\t// conditionally negate |u| (or |v|)\n\tldr\tx26, [x1,#8*6+160]\n\tsub\tx21, x21, x17\n\teor\tx10, x10, x17\n\tadds\tx9, x9, x17, lsr#63\n\teor\tx11, x11, x17\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x17\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x17\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x9, x21\n\teor\tx14, x14, x17\n\tumulh\tx23, x10, x21\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x11, x21\n\teor\tx26, x26, x17\n\tmul\tx9, x9, x21\n\tadcs\tx14, x14, xzr\n\tmul\tx10, x10, x21\n\tadcs\tx26, x26, xzr\n\tadc\tx19, xzr, xzr\t\t// used in __smul_768x63_tail\n\tcmp\tx21, #0\n\tmul\tx11, x11, x21\n\tcsel\tx26, x26, xzr, ne\n\tadds\tx10, x10, x22\n\tumulh\tx22, x12, x21\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x13, x21\n\tmul\tx12, x12, x21\n\tmul\tx13, x13, x21\n\tadcs\tx12, x12, x24\n\tmul\tx28,x14, x21\n\tadcs\tx13, x13, x22\n\tadcs\tx28,x28,x23\n\tadc\tx2, x2, xzr\n\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*2]\n\tadcs\tx27,   x27,   x28\n\tstp\tx7, x27,   [x0,#8*4]\n\n\tret\n\n\n\n.align\t5\n__smul_768x63_tail:\n\tumulh\tx27, x8, x20\n\tldr\tx4, [x1,#8*27]// load rest of |v|\n\tadc\tx2, x2, xzr\n\tldp\tx5, x6, [x1,#8*28]\n\tand\tx25, x25, x20\n\tldp\tx7, x8, [x1,#8*30]\n\tsub\tx27, x27, x25\t// tie up |u|*|f1| chain\n\n\tumulh\tx14, x14, x21\t// resume |v|*|g1| chain\n\teor\tx4, x4, x17\t// conditionally negate rest of |v|\n\teor\tx5, x5, x17\n\teor\tx6, x6, x17\n\tadds\tx4, x4, x19\n\teor\tx7, x7, x17\n\tadcs\tx5, x5, xzr\n\teor\tx8, x8, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x26,   x21\n\tadcs\tx7, x7, xzr\n\tumulh\tx23, x4, x21\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x21\n\tadd\tx14, x14, x2\n\tumulh\tx25, x6, x21\n\tasr\tx28, x27, #63\n\tumulh\tx2, x7, x21\n\tmul\tx3, x26,   x21\n\tmul\tx4, x4, x21\n\tmul\tx5, x5, x21\n\tadds\tx3, x3, x14\n\tmul\tx6, x6, x21\n\tadcs\tx4, x4, x22\n\tmul\tx7, x7, x21\n\tadcs\tx5, x5, x23\n\tmul\tx22,   x8, x21\n\tadcs\tx6, x6, x24\n\tadcs\tx7, x7, x25\n\tadcs\tx25,   x22, x2\n\tadc\tx26, xzr, xzr\t\t// used in the final step\n\n\tadds\tx3, x3, x27\n\tadcs\tx4, x4, x28\n\tadcs\tx5, x5, x28\n\tadcs\tx6, x6, x28\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x28\n\tstp\tx5, x6, [x0,#8*8]\n\tadcs\tx25,   x25,   x28\t// carry is used in the final step\n\tstp\tx7, x25,   [x0,#8*10]\n\n\tret\n\n\n\n.align\t5\n__smul_384_n_shift_by_62:\n\tldp\tx3, x4, [x1,#8*0+0]\t// load |a| (or |b|)\n\tasr\tx28, x15, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx5, x6, [x1,#8*2+0]\n\teor\tx2, x15, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx7, x8, [x1,#8*4+0]\n\n\teor\tx3, x3, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tumulh\tx22, x3, x2\n\tadcs\tx6, x6, xzr\n\tumulh\tx23, x4, x2\n\teor\tx8, x8, x28\n\tmul\tx3, x3, x2\n\tadcs\tx7, x7, xzr\n\tmul\tx4, x4, x2\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x6, x2\n\tadds\tx4, x4, x22\n\tmul\tx5, x5, x2\n\tumulh\tx22, x7, x2\n\tneg\tx28, x28\n\tmul\tx6, x6, x2\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x8, x2\n\tmul\tx7, x7, x2\n\tadcs\tx6, x6, x24\n\tmul\tx8, x8, x2\n\tadcs\tx7, x7, x25\n\tadcs\tx8, x8, x22\n\tadc\tx27, x23, x28\n\tldp\tx9, x10, [x1,#8*0+48]\t// load |a| (or |b|)\n\tasr\tx28, x16, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\tx11, x12, [x1,#8*2+48]\n\teor\tx2, x16, x28\t// conditionally negate |f0| (or |g0|)\n\tldp\tx13, x14, [x1,#8*4+48]\n\n\teor\tx9, x9, x28\t// conditionally negate |a| (or |b|)\n\tsub\tx2, x2, x28\n\teor\tx10, x10, x28\n\tadds\tx9, x9, x28, lsr#63\n\teor\tx11, x11, x28\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x28\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x28\n\tumulh\tx22, x9, x2\n\tadcs\tx12, x12, xzr\n\tumulh\tx23, x10, x2\n\teor\tx14, x14, x28\n\tmul\tx9, x9, x2\n\tadcs\tx13, x13, xzr\n\tmul\tx10, x10, x2\n\tadc\tx14, x14, xzr\n\n\tumulh\tx24, x11, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x12, x2\n\tadds\tx10, x10, x22\n\tmul\tx11, x11, x2\n\tumulh\tx22, x13, x2\n\tneg\tx28, x28\n\tmul\tx12, x12, x2\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x14, x2\n\tmul\tx13, x13, x2\n\tadcs\tx12, x12, x24\n\tmul\tx14, x14, x2\n\tadcs\tx13, x13, x25\n\tadcs\tx14, x14, x22\n\tadc\tx28, x23, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x27,   x28\n\n\textr\tx3, x4, x3, #62\n\textr\tx4, x5, x4, #62\n\textr\tx5, x6, x5, #62\n\tasr\tx28, x9, #63\n\textr\tx6, x7, x6, #62\n\textr\tx7, x8, x7, #62\n\textr\tx8, x9, x8, #62\n\n\teor\tx3, x3, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x28\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x0,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x0,#8*4]\n\n\teor\tx15, x15, x28\n\teor\tx16, x16, x28\n\tsub\tx15, x15, x28\n\tsub\tx16, x16, x28\n\n\tret\n\n\n.align\t4\n__ab_approximation_62:\n\tldp\tx7, x8, [x1,#8*4]\n\tldp\tx13, x14, [x1,#8*10]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx11, x12, [x1,#8*8]\n\nLab_approximation_62_loaded:\n\torr\tx22, x8, x14\t// check top-most limbs, ...\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx22, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tldp\tx3, x4, [x1,#8*0]\n\tldp\tx9, x10, [x1,#8*6]\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx22, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx22, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx22, x8, x14\n\tcsel\tx13, x13, x10, ne\n\n\tclz\tx22, x22\n\tcmp\tx22, #64\n\tcsel\tx22, x22, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx23, x22\n\n\tlslv\tx8, x8, x22\t// align high limbs to the left\n\tlslv\tx14, x14, x22\n\tlsrv\tx7, x7, x23\n\tlsrv\tx13, x13, x23\n\tand\tx7, x7, x23, asr#6\n\tand\tx13, x13, x23, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tb\t__inner_loop_62\n\tret\n\n\n.align\t4\n__inner_loop_62:\n\tmov\tx15, #1\t\t// |f0|=1\n\tmov\tx16, #0\t\t// |g0|=0\n\tmov\tx17, #0\t\t// |f1|=0\n\tmov\tx19, #1\t\t// |g1|=1\n\nLoop_62:\n\tsbfx\tx28, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\tx2, x2, #1\n\tsubs\tx24, x9, x3\t// |b_|-|a_|\n\tand\tx22, x9, x28\n\tsbc\tx25, x14, x8\n\tand\tx23, x14, x28\n\tsubs\tx26, x3, x22\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\tx22, x15\n\tsbcs\tx27, x8, x23\n\tmov\tx23, x16\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx14, x14, x8, hs\n\tcsel\tx3, x26, x24, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx8, x27, x25, hs\n\tcsel\tx15, x15, x17,       hs\t// exchange |f0| and |f1|\n\tcsel\tx17, x17, x22,     hs\n\tcsel\tx16, x16, x19,       hs\t// exchange |g0| and |g1|\n\tcsel\tx19, x19, x23,     hs\n\textr\tx3, x8, x3, #1\n\tlsr\tx8, x8, #1\n\tand\tx22, x17, x28\n\tand\tx23, x19, x28\n\tadd\tx17, x17, x17\t\t// |f1|<<=1\n\tadd\tx19, x19, x19\t\t// |g1|<<=1\n\tsub\tx15, x15, x22\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\tx16, x16, x23\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\tx2, Loop_62\n\n\tret\n\n"
  },
  {
    "path": "build/mach-o/ct_is_square_mod_384-armv8.S",
    "content": ".text\n\n.globl\t_ct_is_square_mod_384\n.private_extern\t_ct_is_square_mod_384\n\n.align\t5\n_ct_is_square_mod_384:\n\thint\t#25\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #512\n\n\tldp\tx3, x4, [x0,#8*0]\t\t// load input\n\tldp\tx5, x6, [x0,#8*2]\n\tldp\tx7, x8, [x0,#8*4]\n\n\tadd\tx0, sp, #255\t// find closest 256-byte-aligned spot\n\tand\tx0, x0, #-256\t// in the frame...\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\n\tldp\tx9, x10, [x1,#8*0]\t\t// load modulus\n\tldp\tx11, x12, [x1,#8*2]\n\tldp\tx13, x14, [x1,#8*4]\n\n\tstp\tx3, x4, [x0,#8*6]\t// copy input to |a|\n\tstp\tx5, x6, [x0,#8*8]\n\tstp\tx7, x8, [x0,#8*10]\n\tstp\tx9, x10, [x0,#8*0]\t// copy modulus to |b|\n\tstp\tx11, x12, [x0,#8*2]\n\tstp\tx13, x14, [x0,#8*4]\n\n\teor\tx2, x2, x2\t\t\t// init the Legendre symbol\n\tmov\tx15, #24\t\t\t// 24 is 768/30-1\n\tb\tLoop_is_square\n\n.align\t4\nLoop_is_square:\n\tbl\t__ab_approximation_30\n\tsub\tx15, x15, #1\n\n\teor\tx1, x0, #128\t\t// pointer to dst |b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,csp,x1\n#endif\n\tbl\t__smul_384_n_shift_by_30\n\n\tmov\tx19, x16\t\t\t// |f0|\n\tmov\tx20, x17\t\t\t// |g0|\n\tadd\tx1,x1,#8*6\n\tbl\t__smul_384_n_shift_by_30\n\n\tldp\tx9, x10, [x1,#-8*6]\n\teor\tx0, x0, #128\t\t// flip-flop src |a|b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n#endif\n\tand\tx27, x27, x9\t\t// if |a| was negative,\n\tadd\tx2, x2, x27, lsr#1\t\t// adjust |L|\n\n\tcbnz\tx15, Loop_is_square\n\n\t////////////////////////////////////////// last iteration\n\t//bl\t__ab_approximation_30\t\t// |a| and |b| are exact,\n\t//ldr\tx8, [x0,#8*6]\t\t// and loaded\n\t//ldr\tx14, [x0,#8*0]\n\tmov\tx15, #48\t\t\t// 48 is 768%30 + 30\n\tbl\t__inner_loop_48\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tand\tx0, x2, #1\n\teor\tx0, x0, #1\n\n\tadd\tsp, sp, #512\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__smul_384_n_shift_by_30:\n\tldp\tx3, x4, [x0,#8*0+0]\t// load |b| (or |a|)\n\tasr\tx27, x20, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx5, x6, [x0,#8*2+0]\n\teor\tx20, x20, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx7, x8, [x0,#8*4+0]\n\n\teor\tx3, x3, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx20, x20, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tumulh\tx21, x3, x20\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x4, x20\n\teor\tx8, x8, x27\n\tumulh\tx23, x5, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x6, x20\n\tadc\tx8, x8, xzr\n\n\tumulh\tx25, x7, x20\n\tand\tx28, x20, x27\n\tumulh\tx26, x8, x20\n\tneg\tx28, x28\n\tmul\tx3, x3, x20\n\tmul\tx4, x4, x20\n\tmul\tx5, x5, x20\n\tadds\tx4, x4, x21\n\tmul\tx6, x6, x20\n\tadcs\tx5, x5, x22\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x23\n\tmul\tx8, x8, x20\n\tadcs\tx7, x7, x24\n\tadcs\tx8, x8 ,x25\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x0,#8*0+48]\t// load |b| (or |a|)\n\tasr\tx27, x19, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\tx11, x12, [x0,#8*2+48]\n\teor\tx19, x19, x27\t\t// conditionally negate |g1| (or |f1|)\n\tldp\tx13, x14, [x0,#8*4+48]\n\n\teor\tx9, x9, x27\t// conditionally negate |b| (or |a|)\n\tsub\tx19, x19, x27\n\teor\tx10, x10, x27\n\tadds\tx9, x9, x27, lsr#63\n\teor\tx11, x11, x27\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x27\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x27\n\tumulh\tx21, x9, x19\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x10, x19\n\teor\tx14, x14, x27\n\tumulh\tx23, x11, x19\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x12, x19\n\tadc\tx14, x14, xzr\n\n\tumulh\tx25, x13, x19\n\tand\tx28, x19, x27\n\tumulh\tx27, x14, x19\n\tneg\tx28, x28\n\tmul\tx9, x9, x19\n\tmul\tx10, x10, x19\n\tmul\tx11, x11, x19\n\tadds\tx10, x10, x21\n\tmul\tx12, x12, x19\n\tadcs\tx11, x11, x22\n\tmul\tx13, x13, x19\n\tadcs\tx12, x12, x23\n\tmul\tx14, x14, x19\n\tadcs\tx13, x13, x24\n\tadcs\tx14, x14 ,x25\n\tadc\tx27, x27, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x26,   x27\n\n\textr\tx3, x4, x3, #30\n\textr\tx4, x5, x4, #30\n\textr\tx5, x6, x5, #30\n\tasr\tx27, x9, #63\n\textr\tx6, x7, x6, #30\n\textr\tx7, x8, x7, #30\n\textr\tx8, x9, x8, #30\n\n\teor\tx3, x3, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x27\n\tstp\tx3, x4, [x1,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x1,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x1,#8*4]\n\n\tret\n\n\n.align\t4\n__ab_approximation_30:\n\tldp\tx13, x14, [x0,#8*4]\t// |a| is still in registers\n\tldp\tx11, x12, [x0,#8*2]\n\n\torr\tx21, x8, x14\t// check top-most limbs, ...\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x6, ne\n\torr\tx21, x8, x14\t// ... ones before top-most, ...\n\tcsel\tx13, x13, x12, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x5, ne\n\torr\tx21, x8, x14\t// ... and ones before that ...\n\tcsel\tx13, x13, x11, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x4, ne\n\torr\tx21, x8, x14\t// and one more, ...\n\tcsel\tx13, x13, x10, ne\n\n\tcmp\tx21, #0\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tcsel\tx7, x7, x3, ne\n\torr\tx21, x8, x14\n\tcsel\tx13, x13, x9, ne\n\n\tclz\tx21, x21\n\tcmp\tx21, #64\n\tcsel\tx21, x21, xzr, ne\n\tcsel\tx8, x8, x7, ne\n\tcsel\tx14, x14, x13, ne\n\tneg\tx22, x21\n\n\tlslv\tx8, x8, x21\t// align high limbs to the left\n\tlslv\tx14, x14, x21\n\tlsrv\tx7, x7, x22\n\tlsrv\tx13, x13, x22\n\tand\tx7, x7, x22, asr#6\n\tand\tx13, x13, x22, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tbfxil\tx8, x3, #0, #32\n\tbfxil\tx14, x9, #0, #32\n\n\tb\t__inner_loop_30\n\tret\n\n\n\n.align\t4\n__inner_loop_30:\n\tmov\tx28, #30\n\tmov\tx17, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\tx20, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\tx27,#0x7FFFFFFF7FFFFFFF\n\nLoop_30:\n\tsbfx\tx24, x8, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x8, x14\n\tsub\tx28, x28, #1\n\tand\tx21, x14, x24\n\n\tsub\tx22, x14, x8\t\t// |b_|-|a_|\n\tsubs\tx23, x8, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\t// L + (a_ & b_) >> 1\n\tmov\tx21, x20\n\tcsel\tx14, x14, x8, hs\t// |b_| = |a_|\n\tcsel\tx8, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx20, x20, x17,  hs\t// exchange |fg0| and |fg1|\n\tcsel\tx17, x17, x21, hs\n\tcsel\tx2,   x2,   x25, hs\n\tlsr\tx8, x8, #1\n\tand\tx21, x20, x24\n\tand\tx22, x27, x24\n\tadd\tx23, x14, #2\n\tsub\tx17, x17, x21\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\tx20, x20, x20\t// |f1|<<=1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\tadd\tx17, x17, x22\n\tsub\tx20, x20, x27\n\n\tcbnz\tx28, Loop_30\n\n\tmov\tx27, #0x7FFFFFFF\n\tubfx\tx16, x17, #0, #32\n\tubfx\tx17, x17, #32, #32\n\tubfx\tx19, x20, #0, #32\n\tubfx\tx20, x20, #32, #32\n\tsub\tx16, x16, x27\t\t// remove the bias\n\tsub\tx17, x17, x27\n\tsub\tx19, x19, x27\n\tsub\tx20, x20, x27\n\n\tret\n\n\n.align\t4\n__inner_loop_48:\nLoop_48:\n\tsbfx\tx24, x3, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tand\tx25, x3, x9\n\tsub\tx15, x15, #1\n\tand\tx21, x9, x24\n\tsub\tx22, x9, x3\t\t// |b_|-|a_|\n\tsubs\tx23, x3, x21\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tadd\tx25, x2, x25, lsr#1\n\tcsel\tx9, x9, x3, hs\t// |b_| = |a_|\n\tcsel\tx3, x23, x22, hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\tx2,   x2,   x25, hs\n\tadd\tx23, x9, #2\n\tlsr\tx3, x3, #1\n\tadd\tx2, x2, x23, lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\n\tcbnz\tx15, Loop_48\n\n\tret\n\n"
  },
  {
    "path": "build/mach-o/ct_is_square_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\t_ct_is_square_mod_384\n.private_extern\t_ct_is_square_mod_384\n\n.p2align\t5\n_ct_is_square_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$536,%rsp\n.cfi_adjust_cfa_offset\t536\n\n\n\tleaq\t24+255(%rsp),%rax\n\tandq\t$-256,%rax\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbx\n\tmovq\t24(%rsi),%rcx\n\tmovq\t32(%rsi),%rdx\n\tmovq\t40(%rsi),%rdi\n\tmovq\t%rax,%rsi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rcx,72(%rax)\n\tmovq\t%rdx,80(%rax)\n\tmovq\t%rdi,88(%rax)\n\n\txorq\t%rbp,%rbp\n\tmovl\t$24,%ecx\n\tjmp\tL$oop_is_square\n\n.p2align\t5\nL$oop_is_square:\n\tmovl\t%ecx,16(%rsp)\n\n\tcall\t__ab_approximation_30\n\tmovq\t%rax,0(%rsp)\n\tmovq\t%rbx,8(%rsp)\n\n\tmovq\t$128+48,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmovq\t0(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmovl\t16(%rsp),%ecx\n\txorq\t$128,%rsi\n\n\tandq\t48(%rdi),%r14\n\tshrq\t$1,%r14\n\taddq\t%r14,%rbp\n\n\tsubl\t$1,%ecx\n\tjnz\tL$oop_is_square\n\n\n\n\n\tmovq\t48(%rsi),%r9\n\tcall\t__inner_loop_48\n\n\tmovq\t$1,%rax\n\tandq\t%rbp,%rax\n\txorq\t$1,%rax\n\n\tleaq\t536(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-536-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__smulq_384_n_shift_by_30:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%rdx,%r14\n\tandq\t%rbx,%r14\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tnegq\t%r14\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\tleaq\t48(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbx\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbx\n\taddq\t%rax,%rbx\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%rdx,%r15\n\tandq\t%rbx,%r15\n\tmulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rbx\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbx\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tnegq\t%r15\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r15\n\tleaq\t-48(%rsi),%rsi\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\n\tshrdq\t$30,%r9,%r8\n\tshrdq\t$30,%r10,%r9\n\tshrdq\t$30,%r11,%r10\n\tshrdq\t$30,%r12,%r11\n\tshrdq\t$30,%r13,%r12\n\tshrdq\t$30,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbx,%rbx\n\tsubq\t%r14,%rbx\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbx,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__ab_approximation_30:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t88(%rsi),%rbx\n\tmovq\t80(%rsi),%r15\n\tmovq\t72(%rsi),%r14\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r11,%r12\n\tmovq\t64(%rsi),%r11\n\tcmovzq\t%r14,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r10,%r12\n\tmovq\t56(%rsi),%r10\n\tcmovzq\t%r11,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r9,%r12\n\tmovq\t48(%rsi),%r9\n\tcmovzq\t%r10,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tcmovzq\t%r12,%r13\n\tcmovzq\t%r15,%rbx\n\tcmovzq\t%r8,%r12\n\tcmovzq\t%r9,%r15\n\n\tmovq\t%r13,%rax\n\torq\t%rbx,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r13\n\tcmovzq\t%r9,%rbx\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%r12,%r13\n\tshldq\t%cl,%r15,%rbx\n\n\tmovq\t$0xFFFFFFFF00000000,%rax\n\tmovl\t%r8d,%r8d\n\tmovl\t%r9d,%r9d\n\tandq\t%rax,%r13\n\tandq\t%rax,%rbx\n\torq\t%r13,%r8\n\torq\t%rbx,%r9\n\n\tjmp\t__inner_loop_30\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__inner_loop_30:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rbx\n\tmovq\t$0x800000007FFFFFFF,%rcx\n\tleaq\t-1(%rbx),%r15\n\tmovl\t$30,%edi\n\nL$oop_30:\n\tmovq\t%r8,%rax\n\tandq\t%r9,%rax\n\tshrq\t$1,%rax\n\n\tcmpq\t%r9,%r8\n\tmovq\t%r8,%r10\n\tmovq\t%r9,%r11\n\tleaq\t(%rax,%rbp,1),%rax\n\tmovq\t%rbx,%r12\n\tmovq\t%rcx,%r13\n\tmovq\t%rbp,%r14\n\tcmovbq\t%r9,%r8\n\tcmovbq\t%r10,%r9\n\tcmovbq\t%rcx,%rbx\n\tcmovbq\t%r12,%rcx\n\tcmovbq\t%rax,%rbp\n\n\tsubq\t%r9,%r8\n\tsubq\t%rcx,%rbx\n\taddq\t%r15,%rbx\n\n\ttestq\t$1,%r10\n\tcmovzq\t%r10,%r8\n\tcmovzq\t%r11,%r9\n\tcmovzq\t%r12,%rbx\n\tcmovzq\t%r13,%rcx\n\tcmovzq\t%r14,%rbp\n\n\tleaq\t2(%r9),%rax\n\tshrq\t$1,%r8\n\tshrq\t$2,%rax\n\taddq\t%rcx,%rcx\n\tleaq\t(%rax,%rbp,1),%rbp\n\tsubq\t%r15,%rcx\n\n\tsubl\t$1,%edi\n\tjnz\tL$oop_30\n\n\tshrq\t$32,%r15\n\tmovl\t%ebx,%eax\n\tshrq\t$32,%rbx\n\tmovl\t%ecx,%edx\n\tshrq\t$32,%rcx\n\tsubq\t%r15,%rax\n\tsubq\t%r15,%rbx\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__inner_loop_48:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovl\t$48,%edi\n\nL$oop_48:\n\tmovq\t%r8,%rax\n\tandq\t%r9,%rax\n\tshrq\t$1,%rax\n\n\tcmpq\t%r9,%r8\n\tmovq\t%r8,%r10\n\tmovq\t%r9,%r11\n\tleaq\t(%rax,%rbp,1),%rax\n\tmovq\t%rbp,%r12\n\tcmovbq\t%r9,%r8\n\tcmovbq\t%r10,%r9\n\tcmovbq\t%rax,%rbp\n\n\tsubq\t%r9,%r8\n\n\ttestq\t$1,%r10\n\tcmovzq\t%r10,%r8\n\tcmovzq\t%r11,%r9\n\tcmovzq\t%r12,%rbp\n\n\tleaq\t2(%r9),%rax\n\tshrq\t$1,%r8\n\tshrq\t$2,%rax\n\taddq\t%rax,%rbp\n\n\tsubl\t$1,%edi\n\tjnz\tL$oop_48\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/ctq_inverse_mod_384-x86_64.s",
    "content": ".comm\t___blst_platform_cap,4\n.text\t\n\n.globl\t_ct_inverse_mod_384\n.private_extern\t_ct_inverse_mod_384\n\n.p2align\t5\n_ct_inverse_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$ct_inverse_mod_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$1112,%rsp\n.cfi_adjust_cfa_offset\t1112\n\n\n\tleaq\t88+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t0(%rdx),%r14\n\tmovq\t8(%rdx),%r15\n\tmovq\t16(%rdx),%rbx\n\tmovq\t24(%rdx),%rbp\n\tmovq\t32(%rdx),%rsi\n\tmovq\t40(%rdx),%rdi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rbp,72(%rax)\n\tmovq\t%rsi,80(%rax)\n\tmovq\t%rax,%rsi\n\tmovq\t%rdi,88(%rax)\n\n\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmovq\t%rdx,96(%rdi)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmovq\t%rdx,104(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\n\tmovq\t96(%rsi),%rax\n\tmovq\t152(%rsi),%r11\n\tmovq\t%rdx,%rbx\n\tmovq\t%rax,%r10\n\timulq\t56(%rsp)\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t64(%rsp)\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tleaq\t96(%rsi),%rsi\n\n\tmovq\t%r10,%rax\n\timulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t%rcx\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,104(%rdi)\n\tmovq\t%r9,112(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,120(%rdi)\n\tmovq\t%r9,128(%rdi)\n\tmovq\t%r9,136(%rdi)\n\tmovq\t%r9,144(%rdi)\n\tmovq\t%r9,152(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_384x63\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%r14,64(%rdi)\n\tmovq\t%r14,72(%rdi)\n\tmovq\t%r14,80(%rdi)\n\tmovq\t%r14,88(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\tcall\t__ab_approximation_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384_n_shift_by_62\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\n\txorq\t$256+96,%rsi\n\tmovl\t$62,%edi\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t48(%rsi),%r10\n\tmovq\t56(%rsi),%r11\n\tcall\t__inner_loop_62\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r10,48(%rdi)\n\n\n\n\tleaq\t96(%rsi),%rsi\n\tleaq\t96(%rdi),%rdi\n\tcall\t__smulq_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulq_768x63\n\n\n\txorq\t$256+96,%rsi\n\tmovl\t$24,%edi\n\n\tmovq\t0(%rsi),%r8\n\txorq\t%r9,%r9\n\tmovq\t48(%rsi),%r10\n\txorq\t%r11,%r11\n\tcall\t__inner_loop_62\n\n\n\n\n\n\n\n\tleaq\t96(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulq_768x63\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%r13\n\tsarq\t$63,%r13\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\tadcq\t$0,%rdx\n\n\tmovq\t%rdx,%r13\n\tnegq\t%rdx\n\torq\t%rdx,%r13\n\tsarq\t$63,%rdx\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\txorq\t%rdx,%r8\n\txorq\t%rsi,%rsi\n\txorq\t%rdx,%r9\n\tsubq\t%rdx,%rsi\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\tleaq\t1112(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-1112-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__smulq_768x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tleaq\t56(%rsi),%rsi\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmovq\t%r9,8(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmovq\t%r10,16(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%r11,24(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmovq\t%r12,32(%rdi)\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tsarq\t$63,%r14\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%rcx,%rdx\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%r15\n\tmovq\t64(%rsi),%rbx\n\tmovq\t72(%rsi),%rbp\n\tmovq\t80(%rsi),%rcx\n\tmovq\t88(%rsi),%rdi\n\n\tmovq\t%rdx,%rsi\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rsi\n\taddq\t%rax,%rsi\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\txorq\t%rdx,%r15\n\txorq\t%rdx,%rbx\n\txorq\t%rdx,%rbp\n\txorq\t%rdx,%rcx\n\txorq\t%rdx,%rdi\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rbx\n\tadcq\t$0,%rbp\n\tadcq\t$0,%rcx\n\tadcq\t$0,%rdi\n\n\tmulq\t%rsi\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tmovq\t%rdx,%r9\n\tmulq\t%rsi\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rsi\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rsi\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rsi\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rsi\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmulq\t%rsi\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\tmulq\t%rsi\n\taddq\t%rax,%r15\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbx\n\tmulq\t%rsi\n\taddq\t%rax,%rbx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\tmulq\t%rsi\n\taddq\t%rax,%rbp\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rcx\n\tmulq\t%rsi\n\taddq\t%rax,%rcx\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rdi\n\timulq\t%rsi\n\tmovq\t8(%rsp),%rsi\n\taddq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\n\taddq\t0(%rsi),%r8\n\tadcq\t8(%rsi),%r9\n\tadcq\t16(%rsi),%r10\n\tadcq\t24(%rsi),%r11\n\tadcq\t32(%rsi),%r12\n\tadcq\t40(%rsi),%r13\n\tadcq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%rdi\n\tadcq\t%rdi,%r15\n\tadcq\t%rdi,%rbx\n\tadcq\t%rdi,%rbp\n\tadcq\t%rdi,%rcx\n\tadcq\t%rdi,%rax\n\tadcq\t%rdi,%rdx\n\n\tleaq\t(%rsi),%rdi\n\tmovq\t16(%rsp),%rsi\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__smulq_384x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t56(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,%r15\n\tmovq\t%r14,%rbx\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\txorq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t-56(%rsi),%rsi\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t%r15,%r13\n\tadcq\t%rbx,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__smulq_384_n_shift_by_62:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\tmovq\t%rdx,%r14\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r14\n\tnegq\t%r14\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r14\n\n\tleaq\t48(%rsi),%rsi\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rdx\n\txorq\t%rax,%rax\n\tsubq\t%rdx,%rax\n\n\txorq\t%rdx,%rbp\n\taddq\t%rax,%rbp\n\n\txorq\t%rdx,%r8\n\txorq\t%rdx,%r9\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\tmovq\t%rdx,%r15\n\taddq\t%r8,%rax\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmulq\t%rbp\n\tmovq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tandq\t%rbp,%r15\n\tnegq\t%r15\n\tmovq\t%rdx,%r9\n\tmulq\t%rbp\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\tmulq\t%rbp\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\tmulq\t%rbp\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tadcq\t%rdx,%r15\n\n\tleaq\t-48(%rsi),%rsi\n\tmovq\t%rbx,%rdx\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\n\tshrdq\t$62,%r9,%r8\n\tshrdq\t$62,%r10,%r9\n\tshrdq\t$62,%r11,%r10\n\tshrdq\t$62,%r12,%r11\n\tshrdq\t$62,%r13,%r12\n\tshrdq\t$62,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__ab_approximation_62:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t40(%rsi),%r9\n\tmovq\t88(%rsi),%r11\n\tmovq\t32(%rsi),%rbx\n\tmovq\t80(%rsi),%rbp\n\tmovq\t24(%rsi),%r8\n\tmovq\t72(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t16(%rsi),%r8\n\tmovq\t64(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t8(%rsi),%r8\n\tmovq\t56(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\tmovq\t0(%rsi),%r8\n\tmovq\t48(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tjmp\t__inner_loop_62\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t3\n.long\t0\n__inner_loop_62:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t$1,%r13\n\tmovq\t%rsi,8(%rsp)\n\nL$oop_62:\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\ttestq\t$1,%r8\n\tmovq\t%r10,%rbp\n\tmovq\t%r11,%r14\n\tcmovnzq\t%r10,%rax\n\tcmovnzq\t%r11,%rbx\n\tsubq\t%r8,%rbp\n\tsbbq\t%r9,%r14\n\tmovq\t%r8,%r15\n\tmovq\t%r9,%rsi\n\tsubq\t%rax,%r8\n\tsbbq\t%rbx,%r9\n\tcmovcq\t%rbp,%r8\n\tcmovcq\t%r14,%r9\n\tcmovcq\t%r15,%r10\n\tcmovcq\t%rsi,%r11\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrdq\t$1,%r9,%r8\n\tshrq\t$1,%r9\n\ttestq\t$1,%r15\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%edi\n\tjnz\tL$oop_62\n\n\tmovq\t8(%rsp),%rsi\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rax\n\tlfence\n\tjmpq\t*%rax\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/ctx_inverse_mod_384-x86_64.s",
    "content": ".text\t\n\n.globl\t_ctx_inverse_mod_384\n.private_extern\t_ctx_inverse_mod_384\n\n.p2align\t5\n_ctx_inverse_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$ct_inverse_mod_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$1112,%rsp\n.cfi_adjust_cfa_offset\t1112\n\n\n\tleaq\t88+511(%rsp),%rax\n\tandq\t$-512,%rax\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rcx,40(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t0(%rdx),%r14\n\tmovq\t8(%rdx),%r15\n\tmovq\t16(%rdx),%rbx\n\tmovq\t24(%rdx),%rbp\n\tmovq\t32(%rdx),%rsi\n\tmovq\t40(%rdx),%rdi\n\n\tmovq\t%r8,0(%rax)\n\tmovq\t%r9,8(%rax)\n\tmovq\t%r10,16(%rax)\n\tmovq\t%r11,24(%rax)\n\tmovq\t%r12,32(%rax)\n\tmovq\t%r13,40(%rax)\n\n\tmovq\t%r14,48(%rax)\n\tmovq\t%r15,56(%rax)\n\tmovq\t%rbx,64(%rax)\n\tmovq\t%rbp,72(%rax)\n\tmovq\t%rsi,80(%rax)\n\tmovq\t%rax,%rsi\n\tmovq\t%rdi,88(%rax)\n\n\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmovq\t%rdx,96(%rdi)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmovq\t%rdx,104(%rdi)\n\n\n\txorq\t$256,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\n\tmovq\t96(%rsi),%rax\n\tmovq\t152(%rsi),%r11\n\tmovq\t%rdx,%rbx\n\tmovq\t%rax,%r10\n\timulq\t56(%rsp)\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t64(%rsp)\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r9,80(%rdi)\n\tmovq\t%r9,88(%rdi)\n\tmovq\t%r9,96(%rdi)\n\tleaq\t96(%rsi),%rsi\n\n\tmovq\t%r10,%rax\n\timulq\t%rbx\n\tmovq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tmovq\t%rdx,%r9\n\timulq\t%rcx\n\taddq\t%rax,%r8\n\tadcq\t%rdx,%r9\n\tmovq\t%r8,104(%rdi)\n\tmovq\t%r9,112(%rdi)\n\tsarq\t$63,%r9\n\tmovq\t%r9,120(%rdi)\n\tmovq\t%r9,128(%rdi)\n\tmovq\t%r9,136(%rdi)\n\tmovq\t%r9,144(%rdi)\n\tmovq\t%r9,152(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_384x63\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%r14,64(%rdi)\n\tmovq\t%r14,72(%rdi)\n\tmovq\t%r14,80(%rdi)\n\tmovq\t%r14,88(%rdi)\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\txorq\t$256+96,%rsi\n\tmovl\t$31,%edi\n\tcall\t__ab_approximation_31\n\n\n\tmovq\t%r12,72(%rsp)\n\tmovq\t%r13,80(%rsp)\n\n\tmovq\t$256,%rdi\n\txorq\t%rsi,%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,56(%rsp)\n\tmovq\t%rcx,64(%rsp)\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_191_n_shift_by_31\n\tmovq\t%rdx,72(%rsp)\n\tmovq\t%rcx,80(%rsp)\n\n\tmovq\t56(%rsp),%rdx\n\tmovq\t64(%rsp),%rcx\n\tleaq\t96(%rsi),%rsi\n\tleaq\t48(%rdi),%rdi\n\tcall\t__smulx_384x63\n\n\tmovq\t72(%rsp),%rdx\n\tmovq\t80(%rsp),%rcx\n\tleaq\t56(%rdi),%rdi\n\tcall\t__smulx_768x63\n\n\txorq\t$256+96,%rsi\n\tmovl\t$55,%edi\n\n\tmovq\t0(%rsi),%r8\n\n\tmovq\t48(%rsi),%r10\n\n\tcall\t__tail_loop_55\n\n\n\n\n\n\n\n\tleaq\t96(%rsi),%rsi\n\n\n\n\n\n\tmovq\t%r12,%rdx\n\tmovq\t%r13,%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__smulx_768x63\n\n\tmovq\t40(%rsp),%rsi\n\tmovq\t%rdx,%r13\n\tsarq\t$63,%r13\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\tadcq\t$0,%rdx\n\n\tmovq\t%rdx,%r13\n\tnegq\t%rdx\n\torq\t%rdx,%r13\n\tsarq\t$63,%rdx\n\n\tmovq\t%r13,%r8\n\tmovq\t%r13,%r9\n\tmovq\t%r13,%r10\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\tmovq\t%r13,%r11\n\tandq\t16(%rsi),%r10\n\tandq\t24(%rsi),%r11\n\tmovq\t%r13,%r12\n\tandq\t32(%rsi),%r12\n\tandq\t40(%rsi),%r13\n\n\txorq\t%rdx,%r8\n\txorq\t%rsi,%rsi\n\txorq\t%rdx,%r9\n\tsubq\t%rdx,%rsi\n\txorq\t%rdx,%r10\n\txorq\t%rdx,%r11\n\txorq\t%rdx,%r12\n\txorq\t%rdx,%r13\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rbx\n\tadcq\t%r11,%rbp\n\tadcq\t%r12,%rcx\n\tadcq\t%r13,%rax\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\tleaq\t1112(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-1112-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__smulx_768x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\tleaq\t56(%rsi),%rsi\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\txorq\t%rax,%r14\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tsarq\t$63,%r14\n\tmovq\t%r14,56(%rdi)\n\tmovq\t%rcx,%rdx\n\tmovq\t%rcx,%rax\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\tmovq\t56(%rsi),%r15\n\tmovq\t64(%rsi),%rbx\n\tmovq\t72(%rsi),%rbp\n\tmovq\t80(%rsi),%rcx\n\tmovq\t88(%rsi),%rdi\n\n\tsarq\t$63,%rax\n\txorq\t%rsi,%rsi\n\tsubq\t%rax,%rsi\n\n\txorq\t%rax,%rdx\n\taddq\t%rsi,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\txorq\t%rax,%r14\n\txorq\t%rax,%r15\n\txorq\t%rax,%rbx\n\txorq\t%rax,%rbp\n\txorq\t%rax,%rcx\n\txorq\t%rdi,%rax\n\taddq\t%rsi,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%rbx\n\tadcq\t$0,%rbp\n\tadcq\t$0,%rcx\n\tadcq\t$0,%rax\n\n\tmulxq\t%r8,%r8,%rsi\n\tmulxq\t%r9,%r9,%rdi\n\taddq\t%rsi,%r9\n\tmulxq\t%r10,%r10,%rsi\n\tadcq\t%rdi,%r10\n\tmulxq\t%r11,%r11,%rdi\n\tadcq\t%rsi,%r11\n\tmulxq\t%r12,%r12,%rsi\n\tadcq\t%rdi,%r12\n\tmulxq\t%r13,%r13,%rdi\n\tadcq\t%rsi,%r13\n\tmulxq\t%r14,%r14,%rsi\n\tadcq\t%rdi,%r14\n\tmulxq\t%r15,%r15,%rdi\n\tadcq\t%rsi,%r15\n\tmulxq\t%rbx,%rbx,%rsi\n\tadcq\t%rdi,%rbx\n\tmulxq\t%rbp,%rbp,%rdi\n\tadcq\t%rsi,%rbp\n\tmulxq\t%rcx,%rcx,%rsi\n\tadcq\t%rdi,%rcx\n\tmovq\t8(%rsp),%rdi\n\tadcq\t$0,%rsi\n\timulq\t%rdx\n\taddq\t%rsi,%rax\n\tadcq\t$0,%rdx\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%rsi\n\tadcq\t%rsi,%r15\n\tadcq\t%rsi,%rbx\n\tadcq\t%rsi,%rbp\n\tadcq\t%rsi,%rcx\n\tadcq\t%rsi,%rax\n\tadcq\t%rsi,%rdx\n\n\tmovq\t16(%rsp),%rsi\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%rbx,64(%rdi)\n\tmovq\t%rbp,72(%rdi)\n\tmovq\t%rcx,80(%rdi)\n\tmovq\t%rax,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__smulx_384x63:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%r12\n\tmovq\t0+40(%rsi),%r13\n\tmovq\t0+48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rdx\n\taddq\t%rax,%rdx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\txorq\t%rbp,%r12\n\txorq\t%rbp,%r13\n\txorq\t%rbp,%r14\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tmovq\t%rcx,%rdx\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,%r15\n\tmovq\t%r14,%rbx\n\tmovq\t56+0(%rsi),%r8\n\tmovq\t56+8(%rsi),%r9\n\tmovq\t56+16(%rsi),%r10\n\tmovq\t56+24(%rsi),%r11\n\tmovq\t56+32(%rsi),%r12\n\tmovq\t56+40(%rsi),%r13\n\tmovq\t56+48(%rsi),%r14\n\n\tmovq\t%rdx,%rbp\n\tsarq\t$63,%rbp\n\txorq\t%rax,%rax\n\tsubq\t%rbp,%rax\n\n\txorq\t%rbp,%rdx\n\taddq\t%rax,%rdx\n\n\txorq\t%rbp,%r8\n\txorq\t%rbp,%r9\n\txorq\t%rbp,%r10\n\txorq\t%rbp,%r11\n\txorq\t%rbp,%r12\n\txorq\t%rbp,%r13\n\txorq\t%rbp,%r14\n\taddq\t%rax,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\tadcq\t$0,%r14\n\n\tandq\t%rdx,%r14\n\tnegq\t%r14\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%rax\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%rax,%r10\n\tmulxq\t%r11,%r11,%rax\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%rax,%r12\n\tmulxq\t%r13,%r13,%rax\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t%r15,%r13\n\tadcq\t%rbx,%r14\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,48(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__smulx_384_n_shift_by_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\tmovq\t0+24(%rsi),%r11\n\tmovq\t0+32(%rsi),%r12\n\tmovq\t0+40(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tandq\t%rdx,%rax\n\tnegq\t%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r14\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%r14,%r10\n\tmulxq\t%r11,%r11,%r14\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%r14,%r12\n\tmulxq\t%r13,%r13,%r14\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\tmovq\t%rcx,%rdx\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%r14,%r15\n\tmovq\t48+0(%rsi),%r8\n\tmovq\t48+8(%rsi),%r9\n\tmovq\t48+16(%rsi),%r10\n\tmovq\t48+24(%rsi),%r11\n\tmovq\t48+32(%rsi),%r12\n\tmovq\t48+40(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%rax,%r10\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%rax,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tandq\t%rdx,%rax\n\tnegq\t%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r14\n\taddq\t%rbp,%r9\n\tmulxq\t%r10,%r10,%rbp\n\tadcq\t%r14,%r10\n\tmulxq\t%r11,%r11,%r14\n\tadcq\t%rbp,%r11\n\tmulxq\t%r12,%r12,%rbp\n\tadcq\t%r14,%r12\n\tmulxq\t%r13,%r13,%r14\n\tadcq\t%rbp,%r13\n\tadcq\t%rax,%r14\n\n\taddq\t0(%rdi),%r8\n\tadcq\t8(%rdi),%r9\n\tadcq\t16(%rdi),%r10\n\tadcq\t24(%rdi),%r11\n\tadcq\t32(%rdi),%r12\n\tadcq\t40(%rdi),%r13\n\tadcq\t%r15,%r14\n\tmovq\t%rbx,%rdx\n\n\tshrdq\t$31,%r9,%r8\n\tshrdq\t$31,%r10,%r9\n\tshrdq\t$31,%r11,%r10\n\tshrdq\t$31,%r12,%r11\n\tshrdq\t$31,%r13,%r12\n\tshrdq\t$31,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r8\n\txorq\t%r14,%r9\n\txorq\t%r14,%r10\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\tadcq\t$0,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__smulx_191_n_shift_by_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rdx,%rbx\n\tmovq\t0+0(%rsi),%r8\n\tmovq\t0+8(%rsi),%r9\n\tmovq\t0+16(%rsi),%r10\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r8\n\txorq\t%rax,%r9\n\txorq\t%r10,%rax\n\taddq\t%rbp,%r8\n\tadcq\t$0,%r9\n\tadcq\t$0,%rax\n\n\tmulxq\t%r8,%r8,%rbp\n\tmulxq\t%r9,%r9,%r10\n\taddq\t%rbp,%r9\n\tadcq\t$0,%r10\n\timulq\t%rdx\n\taddq\t%rax,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\tmovq\t%rcx,%rdx\n\tmovq\t48+0(%rsi),%r11\n\tmovq\t48+8(%rsi),%r12\n\tmovq\t48+16(%rsi),%r13\n\n\tmovq\t%rdx,%rax\n\tsarq\t$63,%rax\n\txorq\t%rbp,%rbp\n\tsubq\t%rax,%rbp\n\n\txorq\t%rax,%rdx\n\taddq\t%rbp,%rdx\n\n\txorq\t%rax,%r11\n\txorq\t%rax,%r12\n\txorq\t%r13,%rax\n\taddq\t%rbp,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%rax\n\n\tmulxq\t%r11,%r11,%rbp\n\tmulxq\t%r12,%r12,%r13\n\taddq\t%rbp,%r12\n\tadcq\t$0,%r13\n\timulq\t%rdx\n\taddq\t%rax,%r13\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t%r9,%r12\n\tadcq\t%r10,%r13\n\tadcq\t%rdx,%r14\n\tmovq\t%rbx,%rdx\n\n\tshrdq\t$31,%r12,%r11\n\tshrdq\t$31,%r13,%r12\n\tshrdq\t$31,%r14,%r13\n\n\tsarq\t$63,%r14\n\txorq\t%rbp,%rbp\n\tsubq\t%r14,%rbp\n\n\txorq\t%r14,%r11\n\txorq\t%r14,%r12\n\txorq\t%r14,%r13\n\taddq\t%rbp,%r11\n\tadcq\t$0,%r12\n\tadcq\t$0,%r13\n\n\tmovq\t%r11,0(%rdi)\n\tmovq\t%r12,8(%rdi)\n\tmovq\t%r13,16(%rdi)\n\n\txorq\t%r14,%rdx\n\txorq\t%r14,%rcx\n\taddq\t%rbp,%rdx\n\taddq\t%rbp,%rcx\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__ab_approximation_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t40(%rsi),%r9\n\tmovq\t88(%rsi),%r11\n\tmovq\t32(%rsi),%rbx\n\tmovq\t80(%rsi),%rbp\n\tmovq\t24(%rsi),%r8\n\tmovq\t72(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t16(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t64(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t8(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t56(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tmovq\t0(%rsi),%r8\n\tcmovzq\t%r10,%rbp\n\tmovq\t48(%rsi),%r10\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tcmovzq\t%rbx,%r9\n\tcmovzq\t%rbp,%r11\n\tcmovzq\t%r8,%rbx\n\tcmovzq\t%r10,%rbp\n\n\tmovq\t%r9,%rax\n\torq\t%r11,%rax\n\tbsrq\t%rax,%rcx\n\tleaq\t1(%rcx),%rcx\n\tcmovzq\t%r8,%r9\n\tcmovzq\t%r10,%r11\n\tcmovzq\t%rax,%rcx\n\tnegq\t%rcx\n\n\n\tshldq\t%cl,%rbx,%r9\n\tshldq\t%cl,%rbp,%r11\n\n\tmovl\t$0x7FFFFFFF,%eax\n\tandq\t%rax,%r8\n\tandq\t%rax,%r10\n\tandnq\t%r9,%rax,%r9\n\tandnq\t%r11,%rax,%r11\n\torq\t%r9,%r8\n\torq\t%r11,%r10\n\n\tjmp\t__inner_loop_31\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.p2align\t5\n__inner_loop_31:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$0x7FFFFFFF80000000,%rcx\n\tmovq\t$0x800000007FFFFFFF,%r13\n\tmovq\t$0x7FFFFFFF7FFFFFFF,%r15\n\nL$oop_31:\n\tcmpq\t%r10,%r8\n\tmovq\t%r8,%rax\n\tmovq\t%r10,%rbx\n\tmovq\t%rcx,%rbp\n\tmovq\t%r13,%r14\n\tcmovbq\t%r10,%r8\n\tcmovbq\t%rax,%r10\n\tcmovbq\t%r13,%rcx\n\tcmovbq\t%rbp,%r13\n\n\tsubq\t%r10,%r8\n\tsubq\t%r13,%rcx\n\taddq\t%r15,%rcx\n\n\ttestq\t$1,%rax\n\tcmovzq\t%rax,%r8\n\tcmovzq\t%rbx,%r10\n\tcmovzq\t%rbp,%rcx\n\tcmovzq\t%r14,%r13\n\n\tshrq\t$1,%r8\n\taddq\t%r13,%r13\n\tsubq\t%r15,%r13\n\tsubl\t$1,%edi\n\tjnz\tL$oop_31\n\n\tshrq\t$32,%r15\n\tmovl\t%ecx,%edx\n\tmovl\t%r13d,%r12d\n\tshrq\t$32,%rcx\n\tshrq\t$32,%r13\n\tsubq\t%r15,%rdx\n\tsubq\t%r15,%rcx\n\tsubq\t%r15,%r12\n\tsubq\t%r15,%r13\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__tail_loop_55:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t$1,%rdx\n\txorq\t%rcx,%rcx\n\txorq\t%r12,%r12\n\tmovq\t$1,%r13\n\nL$oop_55:\n\txorq\t%rax,%rax\n\ttestq\t$1,%r8\n\tmovq\t%r10,%rbx\n\tcmovnzq\t%r10,%rax\n\tsubq\t%r8,%rbx\n\tmovq\t%r8,%rbp\n\tsubq\t%rax,%r8\n\tcmovcq\t%rbx,%r8\n\tcmovcq\t%rbp,%r10\n\tmovq\t%rdx,%rax\n\tcmovcq\t%r12,%rdx\n\tcmovcq\t%rax,%r12\n\tmovq\t%rcx,%rbx\n\tcmovcq\t%r13,%rcx\n\tcmovcq\t%rbx,%r13\n\txorq\t%rax,%rax\n\txorq\t%rbx,%rbx\n\tshrq\t$1,%r8\n\ttestq\t$1,%rbp\n\tcmovnzq\t%r12,%rax\n\tcmovnzq\t%r13,%rbx\n\taddq\t%r12,%r12\n\taddq\t%r13,%r13\n\tsubq\t%rax,%rdx\n\tsubq\t%rbx,%rcx\n\tsubl\t$1,%edi\n\tjnz\tL$oop_55\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%r8\n\tlfence\n\tjmpq\t*%r8\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/div3w-armv8.S",
    "content": ".text\n\n.globl\t_div_3_limbs\n.private_extern\t_div_3_limbs\n\n.align\t5\n_div_3_limbs:\n\thint\t#34\n\tldp\tx4,x5,[x0]\t// load R\n\teor\tx0,x0,x0\t// Q = 0\n\tmov\tx3,#64\t\t// loop counter\n\tnop\n\nLoop:\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsbcs\tx7,x5,x2\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tcsel\tx4,x4,x6,lo\t// select between R and R - D\n\textr\tx1,x2,x1,#1\t// D >>= 1\n\tcsel\tx5,x5,x7,lo\n\tlsr\tx2,x2,#1\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\tsub\tx3,x3,#1\n\tcbnz\tx3,Loop\n\n\tasr\tx3,x0,#63\t// top bit -> mask\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tsbcs\tx7,x5,x2\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\n\torr\tx0,x0,x3\t// all ones if overflow\n\n\tret\n\n.globl\t_quot_rem_128\n.private_extern\t_quot_rem_128\n\n.align\t5\n_quot_rem_128:\n\thint\t#34\n\tldp\tx3,x4,[x1]\n\n\tmul\tx5,x3,x2\t// divisor[0:1} * quotient\n\tumulh\tx6,x3,x2\n\tmul\tx11,  x4,x2\n\tumulh\tx7,x4,x2\n\n\tldp\tx8,x9,[x0]\t// load 3 limbs of the dividend\n\tldr\tx10,[x0,#16]\n\n\tadds\tx6,x6,x11\n\tadc\tx7,x7,xzr\n\n\tsubs\tx8,x8,x5\t// dividend - divisor * quotient\n\tsbcs\tx9,x9,x6\n\tsbcs\tx10,x10,x7\n\tsbc\tx5,xzr,xzr\t\t// borrow -> mask\n\n\tadd\tx2,x2,x5\t// if borrowed, adjust the quotient ...\n\tand\tx3,x3,x5\n\tand\tx4,x4,x5\n\tadds\tx8,x8,x3\t// ... and add divisor\n\tadc\tx9,x9,x4\n\n\tstp\tx8,x9,[x0]\t// save 2 limbs of the remainder\n\tstr\tx2,[x0,#16]\t// and one limb of the quotient\n\n\tmov\tx0,x2\t\t// return adjusted quotient\n\n\tret\n\n\n.globl\t_quot_rem_64\n.private_extern\t_quot_rem_64\n\n.align\t5\n_quot_rem_64:\n\thint\t#34\n\tldr\tx3,[x1]\n\tldr\tx8,[x0]\t// load 1 limb of the dividend\n\n\tmul\tx5,x3,x2\t// divisor * quotient\n\n\tsub\tx8,x8,x5\t// dividend - divisor * quotient\n\n\tstp\tx8,x2,[x0]\t// save remainder and quotient\n\n\tmov\tx0,x2\t\t// return quotient\n\n\tret\n\n"
  },
  {
    "path": "build/mach-o/div3w-x86_64.s",
    "content": ".text\t\n\n.globl\t_div_3_limbs\n.private_extern\t_div_3_limbs\n\n.p2align\t5\n_div_3_limbs:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\txorq\t%rax,%rax\n\tmovl\t$64,%ecx\n\nL$oop:\n\tmovq\t%r8,%r10\n\tsubq\t%rsi,%r8\n\tmovq\t%r9,%r11\n\tsbbq\t%rdx,%r9\n\tleaq\t1(%rax,%rax,1),%rax\n\tmovq\t%rdx,%rdi\n\tcmovcq\t%r10,%r8\n\tcmovcq\t%r11,%r9\n\tsbbq\t$0,%rax\n\tshlq\t$63,%rdi\n\tshrq\t$1,%rsi\n\tshrq\t$1,%rdx\n\torq\t%rdi,%rsi\n\tsubl\t$1,%ecx\n\tjnz\tL$oop\n\n\tleaq\t1(%rax,%rax,1),%rcx\n\tsarq\t$63,%rax\n\n\tsubq\t%rsi,%r8\n\tsbbq\t%rdx,%r9\n\tsbbq\t$0,%rcx\n\n\torq\t%rcx,%rax\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_quot_rem_128\n.private_extern\t_quot_rem_128\n\n.p2align\t5\n_quot_rem_128:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t%rdx,%rax\n\tmovq\t%rdx,%rcx\n\n\tmulq\t0(%rsi)\n\tmovq\t%rax,%r8\n\tmovq\t%rcx,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r9\n\tadcq\t$0,%rdx\n\n\tmovq\t0(%rdi),%r10\n\tmovq\t8(%rdi),%r11\n\tmovq\t16(%rdi),%rax\n\n\tsubq\t%r8,%r10\n\tsbbq\t%r9,%r11\n\tsbbq\t%rdx,%rax\n\tsbbq\t%r8,%r8\n\n\taddq\t%r8,%rcx\n\tmovq\t%r8,%r9\n\tandq\t0(%rsi),%r8\n\tandq\t8(%rsi),%r9\n\taddq\t%r8,%r10\n\tadcq\t%r9,%r11\n\n\tmovq\t%r10,0(%rdi)\n\tmovq\t%r11,8(%rdi)\n\tmovq\t%rcx,16(%rdi)\n\n\tmovq\t%rcx,%rax\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n\n\n\n.globl\t_quot_rem_64\n.private_extern\t_quot_rem_64\n\n.p2align\t5\n_quot_rem_64:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t%rdx,%rax\n\timulq\t0(%rsi),%rdx\n\n\tmovq\t0(%rdi),%r10\n\n\tsubq\t%rdx,%r10\n\n\tmovq\t%r10,0(%rdi)\n\tmovq\t%rax,8(%rdi)\n\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n"
  },
  {
    "path": "build/mach-o/mul_mont_256-armv8.S",
    "content": ".text\n\n.globl\t_mul_mont_sparse_256\n.private_extern\t_mul_mont_sparse_256\n\n.align\t5\n_mul_mont_sparse_256:\n\thint\t#34\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldr\tx9,        [x2]\n\tldp\tx12,x13,[x1,#16]\n\n\tmul\tx19,x10,x9\n\tldp\tx5,x6,[x3]\n\tmul\tx20,x11,x9\n\tldp\tx7,x8,[x3,#16]\n\tmul\tx21,x12,x9\n\tmul\tx22,x13,x9\n\n\tumulh\tx14,x10,x9\n\tumulh\tx15,x11,x9\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tumulh\tx17,x13,x9\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,xzr,    x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*1]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*2]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*3]\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\t//mul\tx14,x5,x3\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tsubs\txzr,x19,#1\t\t//adds\tx19,x19,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tadcs\tx20,x21,x15\n\tadcs\tx21,x22,x16\n\tadcs\tx22,x23,x17\n\tadc\tx23,xzr,xzr\n\n\tsubs\tx14,x19,x5\n\tsbcs\tx15,x20,x6\n\tsbcs\tx16,x21,x7\n\tsbcs\tx17,x22,x8\n\tsbcs\txzr,    x23,xzr\n\n\tcsel\tx19,x19,x14,lo\n\tcsel\tx20,x20,x15,lo\n\tcsel\tx21,x21,x16,lo\n\tcsel\tx22,x22,x17,lo\n\n\tstp\tx19,x20,[x0]\n\tstp\tx21,x22,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\tret\n\n.globl\t_sqr_mont_sparse_256\n.private_extern\t_sqr_mont_sparse_256\n\n.align\t5\n_sqr_mont_sparse_256:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tmov\tx4,x3\n\n\t////////////////////////////////////////////////////////////////\n\t//  |  |  |  |  |  |a1*a0|  |\n\t//  |  |  |  |  |a2*a0|  |  |\n\t//  |  |a3*a2|a3*a0|  |  |  |\n\t//  |  |  |  |a2*a1|  |  |  |\n\t//  |  |  |a3*a1|  |  |  |  |\n\t// *|  |  |  |  |  |  |  | 2|\n\t// +|a3*a3|a2*a2|a1*a1|a0*a0|\n\t//  |--+--+--+--+--+--+--+--|\n\t//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is x10\n\t//\n\t//  \"can't overflow\" below mark carrying into high part of\n\t//  multiplication result, which can't overflow, because it\n\t//  can never be all ones.\n\n\tmul\tx11,x6,x5\t// a[1]*a[0]\n\tumulh\tx15,x6,x5\n\tmul\tx12,x7,x5\t// a[2]*a[0]\n\tumulh\tx16,x7,x5\n\tmul\tx13,x8,x5\t// a[3]*a[0]\n\tumulh\tx19,x8,x5\n\n\tadds\tx12,x12,x15\t// accumulate high parts of multiplication\n\tmul\tx14,x7,x6\t// a[2]*a[1]\n\tumulh\tx15,x7,x6\n\tadcs\tx13,x13,x16\n\tmul\tx16,x8,x6\t// a[3]*a[1]\n\tumulh\tx17,x8,x6\n\tadc\tx19,x19,xzr\t// can't overflow\n\n\tmul\tx20,x8,x7\t// a[3]*a[2]\n\tumulh\tx21,x8,x7\n\n\tadds\tx15,x15,x16\t// accumulate high parts of multiplication\n\tmul\tx10,x5,x5\t// a[0]*a[0]\n\tadc\tx16,x17,xzr\t// can't overflow\n\n\tadds\tx13,x13,x14\t// accumulate low parts of multiplication\n\tumulh\tx5,x5,x5\n\tadcs\tx19,x19,x15\n\tmul\tx15,x6,x6\t// a[1]*a[1]\n\tadcs\tx20,x20,x16\n\tumulh\tx6,x6,x6\n\tadc\tx21,x21,xzr\t// can't overflow\n\n\tadds\tx11,x11,x11\t// acc[1-6]*=2\n\tmul\tx16,x7,x7\t// a[2]*a[2]\n\tadcs\tx12,x12,x12\n\tumulh\tx7,x7,x7\n\tadcs\tx13,x13,x13\n\tmul\tx17,x8,x8\t// a[3]*a[3]\n\tadcs\tx19,x19,x19\n\tumulh\tx8,x8,x8\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadc\tx22,xzr,xzr\n\n\tadds\tx11,x11,x5\t// +a[i]*a[i]\n\tadcs\tx12,x12,x15\n\tadcs\tx13,x13,x6\n\tadcs\tx19,x19,x16\n\tadcs\tx20,x20,x7\n\tadcs\tx21,x21,x17\n\tadc\tx22,x22,x8\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadds\tx10,x10,x19\t// accumulate upper half\n\tadcs\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadc\tx19,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x19,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n.globl\t_from_mont_256\n.private_extern\t_from_mont_256\n\n.align\t5\n_from_mont_256:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_redc_mont_256\n.private_extern\t_redc_mont_256\n\n.align\t5\n_redc_mont_256:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx16,x17,[x1,#48]\n\n\tadds\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadcs\tx12,x12,x16\n\tadcs\tx13,x13,x17\n\tadc\tx9,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x9,xzr\n\n\tcsel\tx10,x10,x14,lo\n\tcsel\tx11,x11,x15,lo\n\tcsel\tx12,x12,x16,lo\n\tcsel\tx13,x13,x17,lo\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__mul_by_1_mont_256:\n\tmul\tx3,x4,x10\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\t//mul\tx14,x5,x3\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\t\t//adds\tx10,x10,x14\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tadc\tx13,x9,x17\n\n\tret\n\n"
  },
  {
    "path": "build/mach-o/mul_mont_384-armv8.S",
    "content": ".text\n\n.globl\t_add_mod_384x384\n.private_extern\t_add_mod_384x384\n\n.align\t5\n_add_mod_384x384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__add_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__add_mod_384x384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tadcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tadcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tadcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tstp\tx11,x12,[x0,#48]\n\tcsel\tx15,x15,x23,lo\n\tstp\tx13,x14,[x0,#64]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n\n\n.globl\t_sub_mod_384x384\n.private_extern\t_sub_mod_384x384\n\n.align\t5\n_sub_mod_384x384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__sub_mod_384x384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tsbcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tsbcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tsbcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n\n\n\n.align\t5\n__add_mod_384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tstp\tx11,x12,[x0]\n\tcsel\tx16,x16,x24,lo\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\n\n\n.align\t5\n__sub_mod_384:\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\n\n.globl\t_mul_mont_384x\n.private_extern\t_mul_mont_384x\n\n.align\t5\n_mul_mont_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#288\t\t// space for 3 768-bit vectors\n\n\tmov\tx26,x0\t\t// save r_ptr\n\tmov\tx27,x1\t\t// save b_ptr\n\tmov\tx28,x2\t\t// save b_ptr\n\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tadd\tx1,x1,#48\n\tadd\tx2,x2,#48\n\tadd\tx0,sp,#96\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tsub\tx2,x1,#48\n\tadd\tx0,sp,#240\n\tbl\t__add_mod_384\n\n\tadd\tx1,x28,#0\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#192\n\tbl\t__add_mod_384\n\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\t\t// mul_384(t2, a->re+a->im, b->re+b->im)\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,x0\n\tadd\tx2,sp,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,sp,#96\n\tbl\t__sub_mod_384x384\t// t2 = t2-t0-t1\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#96\n\tadd\tx0,sp,#0\n\tbl\t__sub_mod_384x384\t// t0 = t0-t1\n\n\tadd\tx1,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\n\tadd\tx1,sp,#192\n\tadd\tx0,x0,#48\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#288\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sqr_mont_384x\n.private_extern\t_sqr_mont_384x\n\n.align\t5\n_sqr_mont_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#96\t\t// space for 2 384-bit vectors\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tadd\tx0,sp,#0\n\tbl\t__add_mod_384\t\t// t0 = a->re + a->im\n\n\tadd\tx0,sp,#48\n\tbl\t__sub_mod_384\t\t// t1 = a->re - a->im\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx11,x11,x11\t// add with itself\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx19,x11,x19,lo\n\tcsel\tx20,x12,x20,lo\n\tcsel\tx21,x13,x21,lo\n\tldp\tx11,x12,[sp]\n\tcsel\tx22,x14,x22,lo\n\tldr\tx17,        [sp,#48]\n\tcsel\tx23,x15,x23,lo\n\tldp\tx13,x14,[sp,#16]\n\tcsel\tx24,x16,x24,lo\n\tldp\tx15,x16,[sp,#32]\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->re, t0, t1)\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_mul_mont_384\n.private_extern\t_mul_mont_384\n\n.align\t5\n_mul_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__mul_mont_384:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tmov\tx17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*1]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*2]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*3]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*4]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*5]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\t// mul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\t\t// adds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\t// pull r_ptr\n\tadc\tx17,x17,xzr\n\n\tadds\tx19,x20,x26\n\tadcs\tx20,x21,x27\n\tadcs\tx21,x22,x28\n\tadcs\tx22,x23,x0\n\tadcs\tx23,x24,x1\n\tadcs\tx24,x25,x3\n\tadc\tx25,x17,xzr\n\n\tsubs\tx26,x19,x5\n\tsbcs\tx27,x20,x6\n\tsbcs\tx28,x21,x7\n\tsbcs\tx0,x22,x8\n\tsbcs\tx1,x23,x9\n\tsbcs\tx3,x24,x10\n\tsbcs\txzr,    x25,xzr\n\n\tcsel\tx11,x19,x26,lo\n\tcsel\tx12,x20,x27,lo\n\tcsel\tx13,x21,x28,lo\n\tcsel\tx14,x22,x0,lo\n\tcsel\tx15,x23,x1,lo\n\tcsel\tx16,x24,x3,lo\n\tret\n\n\n.globl\t_sqr_mont_384\n.private_extern\t_sqr_mont_384\n\n.align\t5\n_sqr_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\t\t// space for 768-bit vector\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tmov\tx3,x0\t\t// save r_ptr\n\tmov\tx0,sp\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tmov\tx1,sp\n\tmov\tx0,x3\t\t// restore r_ptr\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sqr_n_mul_mont_383\n.private_extern\t_sqr_n_mul_mont_383\n\n.align\t5\n_sqr_n_mul_mont_383:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#96\t\t// space for 768-bit vector\n\tmov\tx17,x5\t\t\t// save b_ptr\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\tmov\tx0,sp\nLoop_sqr_383:\n\tbl\t__sqr_384\n\tsub\tx2,x2,#1\t// counter\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,sp\n\tbl\t__mul_by_1_mont_384\n\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\t// just accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tcbnz\tx2,Loop_sqr_383\n\n\tmov\tx2,x17\n\tldr\tx17,[x17]\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.align\t5\n__sqr_384:\n\tmul\tx19,x12,x11\n\tmul\tx20,x13,x11\n\tmul\tx21,x14,x11\n\tmul\tx22,x15,x11\n\tmul\tx23,x16,x11\n\n\tumulh\tx6,x12,x11\n\tumulh\tx7,x13,x11\n\tumulh\tx8,x14,x11\n\tumulh\tx9,x15,x11\n\tadds\tx20,x20,x6\n\tumulh\tx10,x16,x11\n\tadcs\tx21,x21,x7\n\tmul\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tmul\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tmul\tx9,x15,x12\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x12\n\n\tadds\tx21,x21,x7\n\tumulh\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x12\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x12\n\tadc\tx25,xzr,xzr\n\n\tmul\tx5,x11,x11\n\tadds\tx22,x22,x7\n\tumulh\tx11,  x11,x11\n\tadcs\tx23,x23,x8\n\tmul\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tmul\tx9,x15,x13\n\tadc\tx25,x25,x10\n\tmul\tx10,x16,x13\n\n\tadds\tx23,x23,x8\n\tumulh\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tumulh\tx9,x15,x13\n\tadcs\tx25,x25,x10\n\tumulh\tx10,x16,x13\n\tadc\tx26,xzr,xzr\n\n\tmul\tx6,x12,x12\n\tadds\tx24,x24,x8\n\tumulh\tx12,  x12,x12\n\tadcs\tx25,x25,x9\n\tmul\tx9,x15,x14\n\tadc\tx26,x26,x10\n\tmul\tx10,x16,x14\n\n\tadds\tx25,x25,x9\n\tumulh\tx9,x15,x14\n\tadcs\tx26,x26,x10\n\tumulh\tx10,x16,x14\n\tadc\tx27,xzr,xzr\n\tmul\tx7,x13,x13\n\tadds\tx26,x26,x9\n\tumulh\tx13,  x13,x13\n\tadc\tx27,x27,x10\n\tmul\tx8,x14,x14\n\n\tmul\tx10,x16,x15\n\tumulh\tx14,  x14,x14\n\tadds\tx27,x27,x10\n\tumulh\tx10,x16,x15\n\tmul\tx9,x15,x15\n\tadc\tx28,x10,xzr\n\n\tadds\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadcs\tx22,x22,x22\n\tadcs\tx23,x23,x23\n\tadcs\tx24,x24,x24\n\tadcs\tx25,x25,x25\n\tadcs\tx26,x26,x26\n\tumulh\tx15,  x15,x15\n\tadcs\tx27,x27,x27\n\tmul\tx10,x16,x16\n\tadcs\tx28,x28,x28\n\tumulh\tx16,  x16,x16\n\tadc\tx1,xzr,xzr\n\n\tadds\tx19,x19,x11\n\tadcs\tx20,x20,x6\n\tadcs\tx21,x21,x12\n\tadcs\tx22,x22,x7\n\tadcs\tx23,x23,x13\n\tadcs\tx24,x24,x8\n\tadcs\tx25,x25,x14\n\tstp\tx5,x19,[x0]\n\tadcs\tx26,x26,x9\n\tstp\tx20,x21,[x0,#16]\n\tadcs\tx27,x27,x15\n\tstp\tx22,x23,[x0,#32]\n\tadcs\tx28,x28,x10\n\tstp\tx24,x25,[x0,#48]\n\tadc\tx16,x16,x1\n\tstp\tx26,x27,[x0,#64]\n\tstp\tx28,x16,[x0,#80]\n\n\tret\n\n.globl\t_sqr_384\n.private_extern\t_sqr_384\n\n.align\t5\n_sqr_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_redc_mont_384\n.private_extern\t_redc_mont_384\n\n.align\t5\n_redc_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_from_mont_384\n.private_extern\t_from_mont_384\n\n.align\t5\n_from_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__mul_by_1_mont_384:\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tmul\tx26,x4,x11\n\tldp\tx15,x16,[x1,#32]\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\t// mul\tx19,x5,x26\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\t\t// adds\tx19,x19,x11\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\tret\n\n\n\n.align\t5\n__redc_tail_mont_384:\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\t// accumulate upper half\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsel\tx11,x11,x19,lo\n\tcsel\tx12,x12,x20,lo\n\tcsel\tx13,x13,x21,lo\n\tcsel\tx14,x14,x22,lo\n\tcsel\tx15,x15,x23,lo\n\tcsel\tx16,x16,x24,lo\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\n\n.globl\t_mul_384\n.private_extern\t_mul_384\n\n.align\t5\n_mul_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__mul_384:\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\n\tumulh\tx5,x11,x17\n\tumulh\tx6,x12,x17\n\tumulh\tx7,x13,x17\n\tumulh\tx8,x14,x17\n\tumulh\tx9,x15,x17\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,8*1]\n\n\tstr\tx19,[x0]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(1+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*1]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(2+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*2]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(3+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*3]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(4+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*4]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*5]\n\tadds\tx19,x20,x5\n\tadcs\tx20,x21,x6\n\tadcs\tx21,x22,x7\n\tadcs\tx22,x23,x8\n\tadcs\tx23,x24,x9\n\tadc\tx24,x25,x10\n\n\tstp\tx19,x20,[x0,#48]\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tret\n\n\n.globl\t_mul_382x\n.private_extern\t_mul_382x\n\n.align\t5\n_mul_382x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\t\t// space for two 384-bit vectors\n\n\tldp\tx11,x12,[x1]\n\tmov\tx26,x0\t\t// save r_ptr\n\tldp\tx19,x20,[x1,#48]\n\tmov\tx27,x1\t\t// save a_ptr\n\tldp\tx13,x14,[x1,#16]\n\tmov\tx28,x2\t\t// save b_ptr\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx15,x16,[x1,#32]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx6,x12,x20\n\tldp\tx11,x12,[x2]\n\tadcs\tx7,x13,x21\n\tldp\tx19,x20,[x2,#48]\n\tadcs\tx8,x14,x22\n\tldp\tx13,x14,[x2,#16]\n\tadcs\tx9,x15,x23\n\tldp\tx21,x22,[x2,#64]\n\tadc\tx10,x16,x24\n\tldp\tx15,x16,[x2,#32]\n\n\tstp\tx5,x6,[sp]\n\tadds\tx5,x11,x19\t// t1 = b->re + b->im\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx6,x12,x20\n\tstp\tx7,x8,[sp,#16]\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tstp\tx9,x10,[sp,#32]\n\tadcs\tx9,x15,x23\n\tstp\tx5,x6,[sp,#48]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[sp,#64]\n\tstp\tx9,x10,[sp,#80]\n\n\tbl\t__mul_384\t\t// _mul_384(ret->re, a->re, b->re)\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#48\n\tadd\tx0,x26,#96\n\tbl\t__mul_384\n\n\tadd\tx1,x27,#48\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tadd\tx1,x26,#96\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#96\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,x26,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx1,x26,#0\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sqr_382x\n.private_extern\t_sqr_382x\n\n.align\t5\n_sqr_382x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx13,x14,[x1,#16]\n\tadds\tx5,x11,x19\t// t0 = a->re + a->im\n\tldp\tx21,x22,[x1,#64]\n\tadcs\tx6,x12,x20\n\tldp\tx15,x16,[x1,#32]\n\tadcs\tx7,x13,x21\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx8,x14,x22\n\tstp\tx5,x6,[x0]\n\tadcs\tx9,x15,x23\n\tldp\tx5,x6,[x2]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[x0,#16]\n\n\tsubs\tx11,x11,x19\t// t1 = a->re - a->im\n\tldp\tx7,x8,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tstp\tx9,x10,[x0,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx9,x10,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x25\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x25\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x25\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x25\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tmov\tx4,x1\t\t// save a_ptr\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\n\n\tadd\tx1,x4,#0\n\tadd\tx2,x4,#48\n\tadd\tx0,x0,#96\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x0]\n\tldp\tx13,x14,[x0,#16]\n\tadds\tx11,x11,x11\t// add with itself\n\tldp\tx15,x16,[x0,#32]\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadcs\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tstp\tx11,x12,[x0]\n\tadcs\tx21,x21,x21\n\tstp\tx13,x14,[x0,#16]\n\tadcs\tx22,x22,x22\n\tstp\tx15,x16,[x0,#32]\n\tadcs\tx23,x23,x23\n\tstp\tx19,x20,[x0,#48]\n\tadc\tx24,x24,x24\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sqr_mont_382x\n.private_extern\t_sqr_mont_382x\n\n.align\t5\n_sqr_mont_382x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tsp,sp,#112\t\t// space for two 384-bit vectors + word\n\tmov\tx4,x3\t\t// adjust for missing b_ptr\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx17,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx5,x11,x17\t// t0 = a->re + a->im\n\tadcs\tx6,x12,x20\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tadcs\tx9,x15,x23\n\tadc\tx10,x16,x24\n\n\tsubs\tx19,x11,x17\t// t1 = a->re - a->im\n\tsbcs\tx20,x12,x20\n\tsbcs\tx21,x13,x21\n\tsbcs\tx22,x14,x22\n\tsbcs\tx23,x15,x23\n\tsbcs\tx24,x16,x24\n\tsbc\tx25,xzr,xzr\t\t// borrow flag as mask\n\n\tstp\tx5,x6,[sp]\n\tstp\tx7,x8,[sp,#16]\n\tstp\tx9,x10,[sp,#32]\n\tstp\tx19,x20,[sp,#48]\n\tstp\tx21,x22,[sp,#64]\n\tstp\tx23,x24,[sp,#80]\n\tstr\tx25,[sp,#96]\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tbl\t__mul_mont_383_nonred\t// _mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\tx19,x11,x11\t// add with itself\n\tadcs\tx20,x12,x12\n\tadcs\tx21,x13,x13\n\tadcs\tx22,x14,x14\n\tadcs\tx23,x15,x15\n\tadc\tx24,x16,x16\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tldp\tx11,x12,[sp]\n\tldr\tx17,[sp,#48]\n\tldp\tx13,x14,[sp,#16]\n\tldp\tx15,x16,[sp,#32]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_383_nonred\t// _mul_mont_384(ret->im, t0, t1)\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldr\tx25,[sp,#96]\t// account for sign from a->re - a->im\n\tldp\tx19,x20,[sp]\n\tldp\tx21,x22,[sp,#16]\n\tldp\tx23,x24,[sp,#32]\n\n\tand\tx19,x19,x25\n\tand\tx20,x20,x25\n\tand\tx21,x21,x25\n\tand\tx22,x22,x25\n\tand\tx23,x23,x25\n\tand\tx24,x24,x25\n\n\tsubs\tx11,x11,x19\n\tsbcs\tx12,x12,x20\n\tsbcs\tx13,x13,x21\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tand\tx21,x7,x25\n\tand\tx22,x8,x25\n\tand\tx23,x9,x25\n\tand\tx24,x10,x25\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#112\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n\n.align\t5\n__mul_mont_383_nonred:\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*1]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*2]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*3]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*4]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*5]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\t\t// pull r_ptr\n\n\tadds\tx11,x20,x26\n\tadcs\tx12,x21,x27\n\tadcs\tx13,x22,x28\n\tadcs\tx14,x23,x0\n\tadcs\tx15,x24,x1\n\tadcs\tx16,x25,x3\n\n\tret\n\n\n.globl\t_sgn0_pty_mont_384\n.private_extern\t_sgn0_pty_mont_384\n\n.align\t5\n_sgn0_pty_mont_384:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\tadds\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n\n.globl\t_sgn0_pty_mont_384x\n.private_extern\t_sgn0_pty_mont_384x\n\n.align\t5\n_sgn0_pty_mont_384x:\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tadd\tx1,x1,#48\n\n\tand\tx2,x11,#1\n\torr\tx3,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\torr\tx3,x3,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx2,x2,x17\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\torr\tx1,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\torr\tx1,x1,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tcmp\tx3,#0\n\tcsel\tx3,x0,x2,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\tx1,#0\n\tcsel\tx1,x0,x2,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\t\t// pack sign and parity\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\n"
  },
  {
    "path": "build/mach-o/mulq_mont_256-x86_64.s",
    "content": ".comm\t___blst_platform_cap,4\n.text\t\n\n.globl\t_mul_mont_sparse_256\n.private_extern\t_mul_mont_sparse_256\n\n.p2align\t5\n_mul_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$mul_mont_sparse_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t0(%rdx),%rax\n\tmovq\t0(%rsi),%r13\n\tmovq\t8(%rsi),%r14\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%rbp\n\tmovq\t%rdx,%rbx\n\n\tmovq\t%rax,%r15\n\tmulq\t%r13\n\tmovq\t%rax,%r9\n\tmovq\t%r15,%rax\n\tmovq\t%rdx,%r10\n\tcall\t__mulq_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sqr_mont_sparse_256\n.private_extern\t_sqr_mont_sparse_256\n\n.p2align\t5\n_sqr_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_mont_sparse_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t%rcx,%r8\n\tmovq\t8(%rsi),%r14\n\tmovq\t%rdx,%rcx\n\tmovq\t16(%rsi),%r12\n\tleaq\t(%rsi),%rbx\n\tmovq\t24(%rsi),%rbp\n\n\tmovq\t%rax,%r15\n\tmulq\t%rax\n\tmovq\t%rax,%r9\n\tmovq\t%r15,%rax\n\tmovq\t%rdx,%r10\n\tcall\t__mulq_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulq_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmulq\t%r14\n\taddq\t%rax,%r10\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t%r12\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\txorq\t%r14,%r14\n\tmovq\t%rdx,%r13\n\n\tmovq\t%r9,%rdi\n\timulq\t%r8,%r9\n\n\n\tmovq\t%rax,%r15\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t%rdx,%r14\n\txorq\t%r15,%r15\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r13\n\tadcq\t$0,%r14\n\tadcq\t$0,%r15\n\tmovq\t%r10,%rdi\n\timulq\t%r8,%r10\n\n\n\tmovq\t%rax,%r9\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t%rdx,%r15\n\txorq\t%r9,%r9\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r14\n\tadcq\t$0,%r15\n\tadcq\t$0,%r9\n\tmovq\t%r11,%rdi\n\timulq\t%r8,%r11\n\n\n\tmovq\t%rax,%r10\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t%rdx,%r9\n\txorq\t%r10,%r10\n\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rdi\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%rdi\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdi,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r15\n\tadcq\t$0,%r9\n\tadcq\t$0,%r10\n\timulq\t%r8,%rax\n\tmovq\t8(%rsp),%rsi\n\n\n\tmovq\t%rax,%r11\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r12\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\tmovq\t%r14,%rbx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\n\n\n\tmovq\t%r15,%r12\n\tsubq\t0(%rcx),%r13\n\tsbbq\t8(%rcx),%r14\n\tsbbq\t16(%rcx),%r15\n\tmovq\t%r9,%rbp\n\tsbbq\t24(%rcx),%r9\n\tsbbq\t$0,%r10\n\n\tcmovcq\t%rax,%r13\n\tcmovcq\t%rbx,%r14\n\tcmovcq\t%r12,%r15\n\tmovq\t%r13,0(%rsi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r14,8(%rsi)\n\tmovq\t%r15,16(%rsi)\n\tmovq\t%r9,24(%rsi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_from_mont_256\n.private_extern\t_from_mont_256\n\n.p2align\t5\n_from_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$from_mont_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_256\n\n\n\n\n\n\tmovq\t%r14,%r10\n\tmovq\t%r15,%r11\n\tmovq\t%r9,%r12\n\n\tsubq\t0(%rbx),%r13\n\tsbbq\t8(%rbx),%r14\n\tsbbq\t16(%rbx),%r15\n\tsbbq\t24(%rbx),%r9\n\n\tcmovncq\t%r13,%rax\n\tcmovncq\t%r14,%r10\n\tcmovncq\t%r15,%r11\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r9,%r12\n\tmovq\t%r10,8(%rdi)\n\tmovq\t%r11,16(%rdi)\n\tmovq\t%r12,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_redc_mont_256\n.private_extern\t_redc_mont_256\n\n.p2align\t5\n_redc_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$redc_mont_256$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_256\n\n\taddq\t32(%rsi),%r13\n\tadcq\t40(%rsi),%r14\n\tmovq\t%r13,%rax\n\tadcq\t48(%rsi),%r15\n\tmovq\t%r14,%r10\n\tadcq\t56(%rsi),%r9\n\tsbbq\t%rsi,%rsi\n\n\n\n\n\tmovq\t%r15,%r11\n\tsubq\t0(%rbx),%r13\n\tsbbq\t8(%rbx),%r14\n\tsbbq\t16(%rbx),%r15\n\tmovq\t%r9,%r12\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t$0,%rsi\n\n\tcmovncq\t%r13,%rax\n\tcmovncq\t%r14,%r10\n\tcmovncq\t%r15,%r11\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r9,%r12\n\tmovq\t%r10,8(%rdi)\n\tmovq\t%r11,16(%rdi)\n\tmovq\t%r12,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulq_by_1_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r10\n\tmovq\t16(%rsi),%r11\n\tmovq\t24(%rsi),%r12\n\n\tmovq\t%rax,%r13\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r9\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r13\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t16(%rbx)\n\tmovq\t%r10,%r14\n\timulq\t%rcx,%r10\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r13,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\tmovq\t%r11,%r15\n\timulq\t%rcx,%r11\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\tmovq\t%r12,%r9\n\timulq\t%rcx,%r12\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/mulq_mont_384-x86_64.s",
    "content": ".comm\t___blst_platform_cap,4\n.text\t\n\n\n\n\n\n\n\n\n.p2align\t5\n__subq_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__addq_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__subq_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__subq_mod_384_a_is_loaded:\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_mul_mont_384x\n.private_extern\t_mul_mont_384x\n\n.p2align\t5\n_mul_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$mul_mont_384x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$328,%rsp\n.cfi_adjust_cfa_offset\t328\n\n\n\tmovq\t%rdx,%rbx\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\tmovq\t%r8,0(%rsp)\n\n\n\n\n\tleaq\t40(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tleaq\t48(%rbx),%rbx\n\tleaq\t48(%rsi),%rsi\n\tleaq\t40+96(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t8(%rsp),%rcx\n\tleaq\t-48(%rsi),%rdx\n\tleaq\t40+192+48(%rsp),%rdi\n\tcall\t__addq_mod_384\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__addq_mod_384\n\n\tleaq\t(%rdi),%rbx\n\tleaq\t48(%rdi),%rsi\n\tcall\t__mulq_384\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t40(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n\tcall\t__subq_mod_384x384\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t40(%rsp),%rsi\n\tleaq\t40+96(%rsp),%rdx\n\tleaq\t40(%rsp),%rdi\n\tcall\t__subq_mod_384x384\n\n\tmovq\t%rcx,%rbx\n\n\n\tleaq\t40(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\n\tleaq\t40+192(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tleaq\t328(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-328-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sqr_mont_384x\n.private_extern\t_sqr_mont_384x\n\n.p2align\t5\n_sqr_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_mont_384x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rsi,16(%rsp)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__addq_mod_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32+48(%rsp),%rdi\n\tcall\t__subq_mod_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tcall\t__mulq_mont_384\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tmovq\t%r14,%r12\n\tadcq\t%r9,%r9\n\tmovq\t%r15,%r13\n\tadcq\t%r10,%r10\n\tmovq\t%r8,%rax\n\tadcq\t%r11,%r11\n\tmovq\t%r9,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r10,%rbp\n\tsbbq\t16(%rcx),%r8\n\tsbbq\t24(%rcx),%r9\n\tsbbq\t32(%rcx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rcx),%r11\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r12,%r14\n\tcmovcq\t%r13,%r15\n\tcmovcq\t%rax,%r8\n\tmovq\t%r14,48(%rdi)\n\tcmovcq\t%rbx,%r9\n\tmovq\t%r15,56(%rdi)\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r8,64(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r10,80(%rdi)\n\tmovq\t%r11,88(%rdi)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rax\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%r12\n\tmovq\t32+24(%rsp),%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_mul_382x\n.private_extern\t_mul_382x\n\n.p2align\t5\n_mul_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$mul_382x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tleaq\t96(%rdi),%rdi\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t48(%rsi),%r8\n\tadcq\t56(%rsi),%r9\n\tadcq\t64(%rsi),%r10\n\tadcq\t72(%rsi),%r11\n\tadcq\t80(%rsi),%r12\n\tadcq\t88(%rsi),%r13\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t32(%rdx),%r12\n\tmovq\t40(%rdx),%r13\n\n\taddq\t48(%rdx),%r8\n\tadcq\t56(%rdx),%r9\n\tadcq\t64(%rdx),%r10\n\tadcq\t72(%rdx),%r11\n\tadcq\t80(%rdx),%r12\n\tadcq\t88(%rdx),%r13\n\n\tmovq\t%r8,32+48(%rsp)\n\tmovq\t%r9,32+56(%rsp)\n\tmovq\t%r10,32+64(%rsp)\n\tmovq\t%r11,32+72(%rsp)\n\tmovq\t%r12,32+80(%rsp)\n\tmovq\t%r13,32+88(%rsp)\n\n\n\tleaq\t32+0(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\tcall\t__mulq_384\n\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rbx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__mulq_384\n\n\n\tleaq\t48(%rsi),%rsi\n\tleaq\t48(%rbx),%rbx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t32(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tmovq\t%rsi,%rdi\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t0(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subq_mod_384x384\n\n\n\tleaq\t-96(%rdi),%rsi\n\tleaq\t32(%rsp),%rdx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__subq_mod_384x384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sqr_382x\n.private_extern\t_sqr_382x\n\n.p2align\t5\n_sqr_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_382x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rcx\n\n\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%rbx\n\tmovq\t32(%rsi),%rbp\n\tmovq\t40(%rsi),%rdx\n\n\tmovq\t%r14,%r8\n\taddq\t48(%rsi),%r14\n\tmovq\t%r15,%r9\n\tadcq\t56(%rsi),%r15\n\tmovq\t%rax,%r10\n\tadcq\t64(%rsi),%rax\n\tmovq\t%rbx,%r11\n\tadcq\t72(%rsi),%rbx\n\tmovq\t%rbp,%r12\n\tadcq\t80(%rsi),%rbp\n\tmovq\t%rdx,%r13\n\tadcq\t88(%rsi),%rdx\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%rax,16(%rdi)\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rdx,40(%rdi)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__subq_mod_384_a_is_loaded\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-48(%rdi),%rbx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__mulq_384\n\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\tleaq\t96(%rdi),%rdi\n\tcall\t__mulq_384\n\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\tmovq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%r15\n\tmovq\t64(%rdi),%rax\n\tmovq\t72(%rdi),%rbx\n\tmovq\t80(%rdi),%rbp\n\taddq\t%r8,%r8\n\tmovq\t88(%rdi),%rdx\n\tadcq\t%r9,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%r10,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%r11,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%r12,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%r13,%r13\n\tmovq\t%r12,32(%rdi)\n\tadcq\t%r14,%r14\n\tmovq\t%r13,40(%rdi)\n\tadcq\t%r15,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%rax,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%rbx,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%rbp,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%rdx,%rdx\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*7\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_mul_384\n.private_extern\t_mul_384\n\n.p2align\t5\n_mul_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$mul_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_384\n\n\tmovq\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__mulq_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rbx),%rax\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%rbp,%rax\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,8(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,16(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,24(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,32(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%rcx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rcx,40(%rdi)\n\tmovq\t%rdx,%rcx\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rax,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\tmovq\t%rcx,48(%rdi)\n\tmovq\t%r8,56(%rdi)\n\tmovq\t%r9,64(%rdi)\n\tmovq\t%r10,72(%rdi)\n\tmovq\t%r11,80(%rdi)\n\tmovq\t%r12,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_sqr_384\n.private_extern\t_sqr_384\n\n.p2align\t5\n_sqr_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tcall\t__sqrq_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__sqrq_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rcx\n\tmovq\t24(%rsi),%rbx\n\n\n\tmovq\t%rax,%r14\n\tmulq\t%r15\n\tmovq\t%rax,%r9\n\tmovq\t%r14,%rax\n\tmovq\t32(%rsi),%rbp\n\tmovq\t%rdx,%r10\n\n\tmulq\t%rcx\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t40(%rsi),%rsi\n\tmovq\t%rdx,%r11\n\n\tmulq\t%rbx\n\taddq\t%rax,%r11\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t%rbp\n\taddq\t%rax,%r12\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t%rsi\n\taddq\t%rax,%r13\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t%rax\n\txorq\t%r8,%r8\n\tmovq\t%rax,0(%rdi)\n\tmovq\t%r15,%rax\n\taddq\t%r9,%r9\n\tadcq\t$0,%r8\n\taddq\t%rdx,%r9\n\tadcq\t$0,%r8\n\tmovq\t%r9,8(%rdi)\n\n\tmulq\t%rcx\n\taddq\t%rax,%r11\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rbx\n\taddq\t%rax,%r12\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rbp\n\taddq\t%rax,%r13\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t%rsi\n\taddq\t%rax,%r14\n\tmovq\t%r15,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t%rax\n\txorq\t%r9,%r9\n\taddq\t%rax,%r8\n\tmovq\t%rcx,%rax\n\taddq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%r9\n\taddq\t%r8,%r10\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r9\n\tmovq\t%r10,16(%rdi)\n\n\tmulq\t%rbx\n\taddq\t%rax,%r13\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rbp\n\taddq\t%rax,%r14\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rsi\n\taddq\t%rax,%r15\n\tmovq\t%rcx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rcx\n\n\tmulq\t%rax\n\txorq\t%r11,%r11\n\taddq\t%rax,%r9\n\tmovq\t%rbx,%rax\n\taddq\t%r12,%r12\n\tadcq\t%r13,%r13\n\tadcq\t$0,%r11\n\taddq\t%r9,%r12\n\tadcq\t%rdx,%r13\n\tadcq\t$0,%r11\n\tmovq\t%r12,32(%rdi)\n\n\n\tmulq\t%rbp\n\taddq\t%rax,%r15\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r13,40(%rdi)\n\tmovq\t%rdx,%r8\n\n\tmulq\t%rsi\n\taddq\t%rax,%rcx\n\tmovq\t%rbx,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%rcx\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbx\n\n\tmulq\t%rax\n\txorq\t%r12,%r12\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t$0,%r12\n\taddq\t%r11,%r14\n\tadcq\t%rdx,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t$0,%r12\n\tmovq\t%r15,56(%rdi)\n\n\n\tmulq\t%rsi\n\taddq\t%rax,%rbx\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t%rax\n\txorq\t%r13,%r13\n\taddq\t%rax,%r12\n\tmovq\t%rsi,%rax\n\taddq\t%rcx,%rcx\n\tadcq\t%rbx,%rbx\n\tadcq\t$0,%r13\n\taddq\t%r12,%rcx\n\tadcq\t%rdx,%rbx\n\tmovq\t%rcx,64(%rdi)\n\tadcq\t$0,%r13\n\tmovq\t%rbx,72(%rdi)\n\n\n\tmulq\t%rax\n\taddq\t%r13,%rax\n\taddq\t%rbp,%rbp\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rax,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_sqr_mont_384\n.private_extern\t_sqr_mont_384\n\n.p2align\t5\n_sqr_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$120,%rsp\n.cfi_adjust_cfa_offset\t8*15\n\n\n\tmovq\t%rcx,96(%rsp)\n\tmovq\t%rdx,104(%rsp)\n\tmovq\t%rdi,112(%rsp)\n\n\tmovq\t%rsp,%rdi\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rsp),%rsi\n\tmovq\t96(%rsp),%rcx\n\tmovq\t104(%rsp),%rbx\n\tmovq\t112(%rsp),%rdi\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tleaq\t120(%rsp),%r8\n\tmovq\t120(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-8*21\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n\n.globl\t_redc_mont_384\n.private_extern\t_redc_mont_384\n\n.p2align\t5\n_redc_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$redc_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n\n\n.globl\t_from_mont_384\n.private_extern\t_from_mont_384\n\n.p2align\t5\n_from_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$from_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulq_by_1_mont_384\n\n\n\n\n\n\tmovq\t%r15,%rcx\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulq_by_1_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%rax,%r14\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r8\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r9,%r15\n\timulq\t%rcx,%r9\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r10,%r8\n\timulq\t%rcx,%r10\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r8\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r11,%r9\n\timulq\t%rcx,%r11\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r12,%r10\n\timulq\t%rcx,%r12\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%r13,%r11\n\timulq\t%rcx,%r13\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rbx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__redq_tail_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t48(%rsi),%r14\n\tmovq\t%r14,%rax\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tmovq\t%r15,%rcx\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tsbbq\t%r12,%r12\n\n\n\n\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_sgn0_pty_mont_384\n.private_extern\t_sgn0_pty_mont_384\n\n.p2align\t5\n_sgn0_pty_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sgn0_pty_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t0(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n\tcall\t__mulq_by_1_mont_384\n\n\txorq\t%rax,%rax\n\tmovq\t%r14,%r13\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sgn0_pty_mont_384x\n.private_extern\t_sgn0_pty_mont_384x\n\n.p2align\t5\n_sgn0_pty_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sgn0_pty_mont_384x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t48(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n\tcall\t__mulq_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\tleaq\t0(%rdi),%rsi\n\txorq\t%rdi,%rdi\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r14,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%r13\n\tandq\t$2,%rdi\n\torq\t%r13,%rdi\n\n\tcall\t__mulq_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\txorq\t%rax,%rax\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%r12\n\n\tnotq\t%rax\n\n\ttestq\t%r14,%r14\n\tcmovzq\t%rdi,%r13\n\n\ttestq\t%r12,%r12\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_mul_mont_384\n.private_extern\t_mul_mont_384\n\n.p2align\t5\n_mul_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$mul_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$24,%rsp\n.cfi_adjust_cfa_offset\t8*3\n\n\n\tmovq\t0(%rdx),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\tmovq\t%rdx,%rbx\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\n\tcall\t__mulq_mont_384\n\n\tmovq\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-72\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulq_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rax,%rdi\n\tmulq\t%r14\n\tmovq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t%r15\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t%r12\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r8,%rbp\n\timulq\t8(%rsp),%r8\n\n\tmulq\t%r13\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\txorq\t%r15,%r15\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t%rdx,%r14\n\tadcq\t$0,%r15\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmovq\t%r9,%rbp\n\timulq\t8(%rsp),%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rsi)\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\txorq\t%r8,%r8\n\taddq\t%rax,%r14\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%r8\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t%rdx,%r15\n\tadcq\t$0,%r8\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmovq\t%r10,%rbp\n\timulq\t8(%rsp),%r10\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rsi)\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\txorq\t%r9,%r9\n\taddq\t%rax,%r15\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r8\n\tadcq\t$0,%r9\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t%rdx,%r8\n\tadcq\t$0,%r9\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmovq\t%r11,%rbp\n\timulq\t8(%rsp),%r11\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rsi)\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\txorq\t%r10,%r10\n\taddq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r8\n\tadcq\t%rdx,%r9\n\tadcq\t$0,%r10\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r12,%rbp\n\timulq\t8(%rsp),%r12\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\txorq\t%r11,%r11\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\tadcq\t$0,%r11\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t%rdx,%r10\n\tadcq\t$0,%r11\n\n\tmovq\t%rax,%rdi\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmovq\t%r13,%rbp\n\timulq\t8(%rsp),%r13\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rdi,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t40(%rsi)\n\taddq\t%r12,%r10\n\tadcq\t$0,%rdx\n\txorq\t%r12,%r12\n\taddq\t%rax,%r10\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r12\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%rbp\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%rbp\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t24(%rcx)\n\taddq\t%rbp,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%rbp\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%rbp,%r10\n\tadcq\t%rdx,%r11\n\tadcq\t$0,%r12\n\n\n\n\n\tmovq\t16(%rsp),%rdi\n\tsubq\t0(%rcx),%r14\n\tmovq\t%r15,%rdx\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r8,%rbx\n\tsbbq\t16(%rcx),%r8\n\tmovq\t%r9,%rsi\n\tsbbq\t24(%rcx),%r9\n\tmovq\t%r10,%rbp\n\tsbbq\t32(%rcx),%r10\n\tmovq\t%r11,%r13\n\tsbbq\t40(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rdx,%r15\n\tcmovcq\t%rbx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rsi,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%r13,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_sqr_n_mul_mont_384\n.private_extern\t_sqr_n_mul_mont_384\n\n.p2align\t5\n_sqr_n_mul_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_n_mul_mont_384$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t8*17\n\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rcx,16(%rsp)\n\tleaq\t32(%rsp),%rdi\n\tmovq\t%r9,24(%rsp)\n\tmovq\t(%r9),%xmm2\n\nL$oop_sqr_384:\n\tmovd\t%edx,%xmm1\n\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rdi),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t16(%rsp),%rbx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovd\t%xmm1,%edx\n\tleaq\t0(%rdi),%rsi\n\tdecl\t%edx\n\tjnz\tL$oop_sqr_384\n\n.byte\t102,72,15,126,208\n\tmovq\t%rbx,%rcx\n\tmovq\t24(%rsp),%rbx\n\n\n\n\n\n\n\tmovq\t%r8,%r12\n\tmovq\t%r9,%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t136(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-8*23\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sqr_n_mul_mont_383\n.private_extern\t_sqr_n_mul_mont_383\n\n.p2align\t5\n_sqr_n_mul_mont_383:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_n_mul_mont_383$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t8*17\n\n\n\tmovq\t%r8,0(%rsp)\n\tmovq\t%rdi,8(%rsp)\n\tmovq\t%rcx,16(%rsp)\n\tleaq\t32(%rsp),%rdi\n\tmovq\t%r9,24(%rsp)\n\tmovq\t(%r9),%xmm2\n\nL$oop_sqr_383:\n\tmovd\t%edx,%xmm1\n\n\tcall\t__sqrq_384\n\n\tleaq\t0(%rdi),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t16(%rsp),%rbx\n\tcall\t__mulq_by_1_mont_384\n\n\tmovd\t%xmm1,%edx\n\taddq\t48(%rsi),%r14\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tleaq\t0(%rdi),%rsi\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r8,16(%rdi)\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tdecl\t%edx\n\tjnz\tL$oop_sqr_383\n\n.byte\t102,72,15,126,208\n\tmovq\t%rbx,%rcx\n\tmovq\t24(%rsp),%rbx\n\n\n\n\n\n\n\tmovq\t%r8,%r12\n\tmovq\t%r9,%r13\n\n\tcall\t__mulq_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t136(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-8*23\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulq_mont_383_nonred:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t%rax,%rbp\n\tmulq\t%r14\n\tmovq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tmovq\t%rdx,%r9\n\n\tmulq\t%r15\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t%r12\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r8,%r15\n\timulq\t8(%rsp),%r8\n\n\tmulq\t%r13\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r13\n\n\tmulq\t40(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r8,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rcx)\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r11\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r8,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t8(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t%rdx,%r14\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmovq\t%r9,%r8\n\timulq\t8(%rsp),%r9\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t40(%rsi)\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r9,%rax\n\tadcq\t%rdx,%r8\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t24(%rcx)\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r12\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r9,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t16(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t%rdx,%r15\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r10\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmovq\t%r10,%r9\n\timulq\t8(%rsp),%r10\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r8,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t40(%rsi)\n\taddq\t%r8,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r8\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r9\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t24(%rcx)\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r13\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t24(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t%rdx,%r8\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r11\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmovq\t%r11,%r10\n\timulq\t8(%rsp),%r11\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r9,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t40(%rsi)\n\taddq\t%r9,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r9\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rcx)\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r14\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t32(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t%rdx,%r9\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r12\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmovq\t%r12,%r11\n\timulq\t8(%rsp),%r12\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t40(%rsi)\n\taddq\t%r10,%r9\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r9\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r11\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rcx)\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r15\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r8\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t40(%rbx),%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t%rdx,%r10\n\n\tmovq\t%rax,%rbp\n\tmulq\t0(%rsi)\n\taddq\t%rax,%r13\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t8(%rsi)\n\taddq\t%rax,%r14\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rsi)\n\taddq\t%rax,%r15\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmovq\t%r13,%r12\n\timulq\t8(%rsp),%r13\n\n\tmulq\t24(%rsi)\n\taddq\t%rax,%r8\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r8\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t32(%rsi)\n\taddq\t%rax,%r9\n\tmovq\t%rbp,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t40(%rsi)\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r10\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t0(%rcx)\n\taddq\t%rax,%r12\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r12\n\n\tmulq\t8(%rcx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t16(%rcx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t24(%rcx)\n\taddq\t%r12,%r8\n\tadcq\t$0,%rdx\n\taddq\t%rax,%r8\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t32(%rcx)\n\taddq\t%rax,%r9\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r9\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r12\n\n\tmulq\t40(%rcx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r12,%r10\n\tadcq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_sqr_mont_382x\n.private_extern\t_sqr_mont_382x\n\n.p2align\t5\n_sqr_mont_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n#ifdef __BLST_PORTABLE__\n\ttestl\t$1,___blst_platform_cap(%rip)\n\tjnz\tL$sqr_mont_382x$1\n#endif\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rsi,16(%rsp)\n\tmovq\t%rdi,24(%rsp)\n\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rdx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rbx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rdx\n\tsbbq\t80(%rsi),%rbx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rdi,%rdi\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\tmovq\t%r14,32+48(%rsp)\n\tmovq\t%r15,32+56(%rsp)\n\tmovq\t%rax,32+64(%rsp)\n\tmovq\t%rdx,32+72(%rsp)\n\tmovq\t%rbx,32+80(%rsp)\n\tmovq\t%rbp,32+88(%rsp)\n\tmovq\t%rdi,32+96(%rsp)\n\n\n\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rax\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tmovq\t24(%rsp),%rdi\n\tcall\t__mulq_mont_383_nonred\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\n\tmovq\t%r14,48(%rdi)\n\tmovq\t%r15,56(%rdi)\n\tmovq\t%r8,64(%rdi)\n\tmovq\t%r9,72(%rdi)\n\tmovq\t%r10,80(%rdi)\n\tmovq\t%r11,88(%rdi)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rax\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%r12\n\tmovq\t32+24(%rsp),%r13\n\n\tcall\t__mulq_mont_383_nonred\n\tmovq\t32+96(%rsp),%rsi\n\tmovq\t32+0(%rsp),%r12\n\tmovq\t32+8(%rsp),%r13\n\tandq\t%rsi,%r12\n\tmovq\t32+16(%rsp),%rax\n\tandq\t%rsi,%r13\n\tmovq\t32+24(%rsp),%rbx\n\tandq\t%rsi,%rax\n\tmovq\t32+32(%rsp),%rbp\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t32+40(%rsp),%rsi\n\n\tsubq\t%r12,%r14\n\tmovq\t0(%rcx),%r12\n\tsbbq\t%r13,%r15\n\tmovq\t8(%rcx),%r13\n\tsbbq\t%rax,%r8\n\tmovq\t16(%rcx),%rax\n\tsbbq\t%rbx,%r9\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t%rbp,%r10\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t%rsi,%r11\n\tsbbq\t%rsi,%rsi\n\n\tandq\t%rsi,%r12\n\tandq\t%rsi,%r13\n\tandq\t%rsi,%rax\n\tandq\t%rsi,%rbx\n\tandq\t%rsi,%rbp\n\tandq\t40(%rcx),%rsi\n\n\taddq\t%r12,%r14\n\tadcq\t%r13,%r15\n\tadcq\t%rax,%r8\n\tadcq\t%rbx,%r9\n\tadcq\t%rbp,%r10\n\tadcq\t%rsi,%r11\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r8,16(%rdi)\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n"
  },
  {
    "path": "build/mach-o/mulx_mont_256-x86_64.s",
    "content": ".text\t\n\n.globl\t_mulx_mont_sparse_256\n.private_extern\t_mulx_mont_sparse_256\n\n.p2align\t5\n_mulx_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$mul_mont_sparse_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbp\n\tmovq\t24(%rsi),%r9\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%rax,%r11\n\tcall\t__mulx_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sqrx_mont_sparse_256\n.private_extern\t_sqrx_mont_sparse_256\n\n.p2align\t5\n_sqrx_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_mont_sparse_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tmovq\t%rcx,%r8\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rbp\n\tmovq\t24(%rsi),%r9\n\tleaq\t-128(%rbx),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%rdx,%rax,%r11\n\tcall\t__mulx_mont_sparse_256\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulx_mont_sparse_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmulxq\t%r15,%r15,%r12\n\tmulxq\t%rbp,%rbp,%r13\n\taddq\t%r15,%r11\n\tmulxq\t%r9,%r9,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rbp,%r12\n\tadcq\t%r9,%r13\n\tadcq\t$0,%r14\n\n\tmovq\t%rax,%r10\n\timulq\t%r8,%rax\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r11\n\tadcxq\t%r9,%r12\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r12\n\tadcxq\t%r9,%r13\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r15,%r9\n\tadoxq\t%r9,%r15\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r11,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r12\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r12\n\tadoxq\t%r9,%r13\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rbp,%r13\n\tadoxq\t%r9,%r14\n\tadcxq\t%r10,%r14\n\tadoxq\t%r10,%r15\n\tadcxq\t%r10,%r15\n\tadoxq\t%r10,%r10\n\tadcq\t$0,%r10\n\tmovq\t%rax,%r11\n\timulq\t%r8,%rax\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r12\n\tadcxq\t%r9,%r13\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r9,%r15\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r10,%r9\n\tadoxq\t%r9,%r10\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r11\n\tadoxq\t%r12,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r13\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r13\n\tadoxq\t%r9,%r14\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\tadcxq\t%r11,%r15\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\tadoxq\t%r11,%r11\n\tadcq\t$0,%r11\n\tmovq\t%rax,%r12\n\timulq\t%r8,%rax\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r13\n\tadcxq\t%r9,%r14\n\n\tmulxq\t8+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r9,%r15\n\n\tmulxq\t16+128(%rsi),%rbp,%r9\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r9,%r10\n\n\tmulxq\t24+128(%rsi),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadoxq\t%rbp,%r10\n\tadcxq\t%r11,%r9\n\tadoxq\t%r9,%r11\n\n\n\tmulxq\t0+128(%rcx),%rbp,%rax\n\tadcxq\t%rbp,%r12\n\tadoxq\t%r13,%rax\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r9,%r14\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t%rax,%rdx\n\tadcxq\t%rbp,%r15\n\tadoxq\t%r9,%r10\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tadcxq\t%r12,%r11\n\tadoxq\t%r12,%r12\n\tadcq\t$0,%r12\n\timulq\t%r8,%rdx\n\n\n\txorq\t%rbp,%rbp\n\tmulxq\t0+128(%rcx),%r13,%r9\n\tadcxq\t%rax,%r13\n\tadoxq\t%r9,%r14\n\n\tmulxq\t8+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r14\n\tadoxq\t%r9,%r15\n\n\tmulxq\t16+128(%rcx),%rbp,%r9\n\tadcxq\t%rbp,%r15\n\tadoxq\t%r9,%r10\n\n\tmulxq\t24+128(%rcx),%rbp,%r9\n\tmovq\t%r14,%rdx\n\tleaq\t128(%rcx),%rcx\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r9,%r11\n\tmovq\t%r15,%rax\n\tadcxq\t%r13,%r11\n\tadoxq\t%r13,%r12\n\tadcq\t$0,%r12\n\n\n\n\n\tmovq\t%r10,%rbp\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tsbbq\t16(%rcx),%r10\n\tmovq\t%r11,%r9\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rdx,%r14\n\tcmovcq\t%rax,%r15\n\tcmovcq\t%rbp,%r10\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%r9,%r11\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_fromx_mont_256\n.private_extern\t_fromx_mont_256\n\n.p2align\t5\n_fromx_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$from_mont_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulx_by_1_mont_256\n\n\n\n\n\n\tmovq\t%r15,%rdx\n\tmovq\t%r10,%r12\n\tmovq\t%r11,%r13\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r10\n\tsbbq\t24(%rbx),%r11\n\n\tcmovncq\t%r14,%rax\n\tcmovncq\t%r15,%rdx\n\tcmovncq\t%r10,%r12\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r11,%r13\n\tmovq\t%rdx,8(%rdi)\n\tmovq\t%r12,16(%rdi)\n\tmovq\t%r13,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_redcx_mont_256\n.private_extern\t_redcx_mont_256\n\n.p2align\t5\n_redcx_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$redc_mont_256$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n\tcall\t__mulx_by_1_mont_256\n\n\taddq\t32(%rsi),%r14\n\tadcq\t40(%rsi),%r15\n\tmovq\t%r14,%rax\n\tadcq\t48(%rsi),%r10\n\tmovq\t%r15,%rdx\n\tadcq\t56(%rsi),%r11\n\tsbbq\t%rsi,%rsi\n\n\n\n\n\tmovq\t%r10,%r12\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r10\n\tmovq\t%r11,%r13\n\tsbbq\t24(%rbx),%r11\n\tsbbq\t$0,%rsi\n\n\tcmovncq\t%r14,%rax\n\tcmovncq\t%r15,%rdx\n\tcmovncq\t%r10,%r12\n\tmovq\t%rax,0(%rdi)\n\tcmovncq\t%r11,%r13\n\tmovq\t%rdx,8(%rdi)\n\tmovq\t%r12,16(%rdi)\n\tmovq\t%r13,24(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulx_by_1_mont_256:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rax\n\tmovq\t8(%rsi),%r11\n\tmovq\t16(%rsi),%r12\n\tmovq\t24(%rsi),%r13\n\n\tmovq\t%rax,%r14\n\timulq\t%rcx,%rax\n\tmovq\t%rax,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r10,%rax\n\tadcq\t%rdx,%r14\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r11\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t16(%rbx)\n\tmovq\t%r11,%r15\n\timulq\t%rcx,%r11\n\taddq\t%rax,%r12\n\tmovq\t%r10,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r14,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r14\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r11,%rax\n\tadcq\t%rdx,%r15\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r12\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r12\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t16(%rbx)\n\tmovq\t%r12,%r10\n\timulq\t%rcx,%r12\n\taddq\t%rax,%r13\n\tmovq\t%r11,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r15,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r15\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r12,%rax\n\tadcq\t%rdx,%r10\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r13\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r13\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t16(%rbx)\n\tmovq\t%r13,%r11\n\timulq\t%rcx,%r13\n\taddq\t%rax,%r14\n\tmovq\t%r12,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r10,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r10\n\n\tmulq\t0(%rbx)\n\taddq\t%rax,%r11\n\tmovq\t%r13,%rax\n\tadcq\t%rdx,%r11\n\n\tmulq\t8(%rbx)\n\taddq\t%rax,%r14\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r14\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t16(%rbx)\n\taddq\t%rax,%r15\n\tmovq\t%r13,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r15\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\n\tmulq\t24(%rbx)\n\taddq\t%rax,%r10\n\tmovq\t%r14,%rax\n\tadcq\t$0,%rdx\n\taddq\t%r11,%r10\n\tadcq\t$0,%rdx\n\tmovq\t%rdx,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/mach-o/mulx_mont_384-x86_64.s",
    "content": ".text\t\n\n\n\n\n\n\n\n\n.p2align\t5\n__subx_mod_384x384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tmovq\t48(%rsi),%r14\n\n\tsubq\t0(%rdx),%r8\n\tmovq\t56(%rsi),%r15\n\tsbbq\t8(%rdx),%r9\n\tmovq\t64(%rsi),%rax\n\tsbbq\t16(%rdx),%r10\n\tmovq\t72(%rsi),%rbx\n\tsbbq\t24(%rdx),%r11\n\tmovq\t80(%rsi),%rbp\n\tsbbq\t32(%rdx),%r12\n\tmovq\t88(%rsi),%rsi\n\tsbbq\t40(%rdx),%r13\n\tmovq\t%r8,0(%rdi)\n\tsbbq\t48(%rdx),%r14\n\tmovq\t0(%rcx),%r8\n\tmovq\t%r9,8(%rdi)\n\tsbbq\t56(%rdx),%r15\n\tmovq\t8(%rcx),%r9\n\tmovq\t%r10,16(%rdi)\n\tsbbq\t64(%rdx),%rax\n\tmovq\t16(%rcx),%r10\n\tmovq\t%r11,24(%rdi)\n\tsbbq\t72(%rdx),%rbx\n\tmovq\t24(%rcx),%r11\n\tmovq\t%r12,32(%rdi)\n\tsbbq\t80(%rdx),%rbp\n\tmovq\t32(%rcx),%r12\n\tmovq\t%r13,40(%rdi)\n\tsbbq\t88(%rdx),%rsi\n\tmovq\t40(%rcx),%r13\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r8\n\tandq\t%rdx,%r9\n\tandq\t%rdx,%r10\n\tandq\t%rdx,%r11\n\tandq\t%rdx,%r12\n\tandq\t%rdx,%r13\n\n\taddq\t%r8,%r14\n\tadcq\t%r9,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%r10,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%r11,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%r12,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%r13,%rsi\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rsi,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__addx_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t0(%rdx),%r8\n\tadcq\t8(%rdx),%r9\n\tadcq\t16(%rdx),%r10\n\tmovq\t%r8,%r14\n\tadcq\t24(%rdx),%r11\n\tmovq\t%r9,%r15\n\tadcq\t32(%rdx),%r12\n\tmovq\t%r10,%rax\n\tadcq\t40(%rdx),%r13\n\tmovq\t%r11,%rbx\n\tsbbq\t%rdx,%rdx\n\n\tsubq\t0(%rcx),%r8\n\tsbbq\t8(%rcx),%r9\n\tmovq\t%r12,%rbp\n\tsbbq\t16(%rcx),%r10\n\tsbbq\t24(%rcx),%r11\n\tsbbq\t32(%rcx),%r12\n\tmovq\t%r13,%rsi\n\tsbbq\t40(%rcx),%r13\n\tsbbq\t$0,%rdx\n\n\tcmovcq\t%r14,%r8\n\tcmovcq\t%r15,%r9\n\tcmovcq\t%rax,%r10\n\tmovq\t%r8,0(%rdi)\n\tcmovcq\t%rbx,%r11\n\tmovq\t%r9,8(%rdi)\n\tcmovcq\t%rbp,%r12\n\tmovq\t%r10,16(%rdi)\n\tcmovcq\t%rsi,%r13\n\tmovq\t%r11,24(%rdi)\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__subx_mod_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n__subx_mod_384_a_is_loaded:\n\tsubq\t0(%rdx),%r8\n\tmovq\t0(%rcx),%r14\n\tsbbq\t8(%rdx),%r9\n\tmovq\t8(%rcx),%r15\n\tsbbq\t16(%rdx),%r10\n\tmovq\t16(%rcx),%rax\n\tsbbq\t24(%rdx),%r11\n\tmovq\t24(%rcx),%rbx\n\tsbbq\t32(%rdx),%r12\n\tmovq\t32(%rcx),%rbp\n\tsbbq\t40(%rdx),%r13\n\tmovq\t40(%rcx),%rsi\n\tsbbq\t%rdx,%rdx\n\n\tandq\t%rdx,%r14\n\tandq\t%rdx,%r15\n\tandq\t%rdx,%rax\n\tandq\t%rdx,%rbx\n\tandq\t%rdx,%rbp\n\tandq\t%rdx,%rsi\n\n\taddq\t%r14,%r8\n\tadcq\t%r15,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%rax,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%rbx,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%rbp,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%rsi,%r13\n\tmovq\t%r12,32(%rdi)\n\tmovq\t%r13,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_mulx_mont_384x\n.private_extern\t_mulx_mont_384x\n\n.p2align\t5\n_mulx_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$mul_mont_384x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$328,%rsp\n.cfi_adjust_cfa_offset\t328\n\n\n\tmovq\t%rdx,%rbx\n\tmovq\t%rdi,32(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\tmovq\t%rdx,16(%rsp)\n\tmovq\t%rcx,8(%rsp)\n\tmovq\t%r8,0(%rsp)\n\n\n\n\n\tleaq\t40(%rsp),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\n\tleaq\t48(%rbx),%rbx\n\tleaq\t128+48(%rsi),%rsi\n\tleaq\t96(%rdi),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t8(%rsp),%rcx\n\tleaq\t(%rbx),%rsi\n\tleaq\t-48(%rbx),%rdx\n\tleaq\t40+192+48(%rsp),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t-48(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tleaq\t(%rdi),%rbx\n\tleaq\t48(%rdi),%rsi\n\tcall\t__mulx_384\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t40(%rsp),%rdx\n\tmovq\t8(%rsp),%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t40(%rsp),%rsi\n\tleaq\t40+96(%rsp),%rdx\n\tleaq\t40(%rsp),%rdi\n\tcall\t__subx_mod_384x384\n\n\tleaq\t(%rcx),%rbx\n\n\n\tleaq\t40(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tmovq\t32(%rsp),%rdi\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\n\tleaq\t40+192(%rsp),%rsi\n\tmovq\t0(%rsp),%rcx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tleaq\t328(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-328-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sqrx_mont_384x\n.private_extern\t_sqrx_mont_384x\n\n.p2align\t5\n_sqrx_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_mont_384x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__addx_mod_384\n\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rdx\n\tleaq\t32+48(%rsp),%rdi\n\tcall\t__subx_mod_384\n\n\n\tmovq\t24(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t48(%rsi),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\taddq\t%rdx,%rdx\n\tadcq\t%r15,%r15\n\tadcq\t%rax,%rax\n\tmovq\t%rdx,%r8\n\tadcq\t%r12,%r12\n\tmovq\t%r15,%r9\n\tadcq\t%rdi,%rdi\n\tmovq\t%rax,%r10\n\tadcq\t%rbp,%rbp\n\tmovq\t%r12,%r11\n\tsbbq\t%rsi,%rsi\n\n\tsubq\t0(%rcx),%rdx\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%rdi,%r13\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%r12\n\tsbbq\t32(%rcx),%rdi\n\tmovq\t%rbp,%r14\n\tsbbq\t40(%rcx),%rbp\n\tsbbq\t$0,%rsi\n\n\tcmovcq\t%r8,%rdx\n\tcmovcq\t%r9,%r15\n\tcmovcq\t%r10,%rax\n\tmovq\t%rdx,48(%rbx)\n\tcmovcq\t%r11,%r12\n\tmovq\t%r15,56(%rbx)\n\tcmovcq\t%r13,%rdi\n\tmovq\t%rax,64(%rbx)\n\tcmovcq\t%r14,%rbp\n\tmovq\t%r12,72(%rbx)\n\tmovq\t%rdi,80(%rbx)\n\tmovq\t%rbp,88(%rbx)\n\n\tleaq\t32(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rdx\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%rax\n\tmovq\t32+24(%rsp),%r12\n\tmovq\t32+32(%rsp),%rdi\n\tmovq\t32+40(%rsp),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_mulx_382x\n.private_extern\t_mulx_382x\n\n.p2align\t5\n_mulx_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$mul_382x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tleaq\t96(%rdi),%rdi\n\tmovq\t%rsi,0(%rsp)\n\tmovq\t%rdx,8(%rsp)\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rcx,24(%rsp)\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\taddq\t48(%rsi),%r8\n\tadcq\t56(%rsi),%r9\n\tadcq\t64(%rsi),%r10\n\tadcq\t72(%rsi),%r11\n\tadcq\t80(%rsi),%r12\n\tadcq\t88(%rsi),%r13\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\n\tmovq\t0(%rdx),%r8\n\tmovq\t8(%rdx),%r9\n\tmovq\t16(%rdx),%r10\n\tmovq\t24(%rdx),%r11\n\tmovq\t32(%rdx),%r12\n\tmovq\t40(%rdx),%r13\n\n\taddq\t48(%rdx),%r8\n\tadcq\t56(%rdx),%r9\n\tadcq\t64(%rdx),%r10\n\tadcq\t72(%rdx),%r11\n\tadcq\t80(%rdx),%r12\n\tadcq\t88(%rdx),%r13\n\n\tmovq\t%r8,32+48(%rsp)\n\tmovq\t%r9,32+56(%rsp)\n\tmovq\t%r10,32+64(%rsp)\n\tmovq\t%r11,32+72(%rsp)\n\tmovq\t%r12,32+80(%rsp)\n\tmovq\t%r13,32+88(%rsp)\n\n\n\tleaq\t32+0(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\tcall\t__mulx_384\n\n\n\tmovq\t0(%rsp),%rsi\n\tmovq\t8(%rsp),%rbx\n\tleaq\t-96(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\n\tleaq\t48+128(%rsi),%rsi\n\tleaq\t48(%rbx),%rbx\n\tleaq\t32(%rsp),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t16(%rsp),%rsi\n\tleaq\t32(%rsp),%rdx\n\tmovq\t24(%rsp),%rcx\n\tmovq\t%rsi,%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t0(%rdi),%rsi\n\tleaq\t-96(%rdi),%rdx\n\tcall\t__subx_mod_384x384\n\n\n\tleaq\t-96(%rdi),%rsi\n\tleaq\t32(%rsp),%rdx\n\tleaq\t-96(%rdi),%rdi\n\tcall\t__subx_mod_384x384\n\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sqrx_382x\n.private_extern\t_sqrx_382x\n\n.p2align\t5\n_sqrx_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_382x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rsi\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rcx\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%rbx\n\tmovq\t32(%rsi),%rbp\n\tmovq\t40(%rsi),%rdx\n\n\tmovq\t%r14,%r8\n\taddq\t48(%rsi),%r14\n\tmovq\t%r15,%r9\n\tadcq\t56(%rsi),%r15\n\tmovq\t%rax,%r10\n\tadcq\t64(%rsi),%rax\n\tmovq\t%rbx,%r11\n\tadcq\t72(%rsi),%rbx\n\tmovq\t%rbp,%r12\n\tadcq\t80(%rsi),%rbp\n\tmovq\t%rdx,%r13\n\tadcq\t88(%rsi),%rdx\n\n\tmovq\t%r14,0(%rdi)\n\tmovq\t%r15,8(%rdi)\n\tmovq\t%rax,16(%rdi)\n\tmovq\t%rbx,24(%rdi)\n\tmovq\t%rbp,32(%rdi)\n\tmovq\t%rdx,40(%rdi)\n\n\n\tleaq\t48(%rsi),%rdx\n\tleaq\t48(%rdi),%rdi\n\tcall\t__subx_mod_384_a_is_loaded\n\n\n\tleaq\t(%rdi),%rsi\n\tleaq\t-48(%rdi),%rbx\n\tleaq\t-48(%rdi),%rdi\n\tcall\t__mulx_384\n\n\n\tmovq\t(%rsp),%rsi\n\tleaq\t48(%rsi),%rbx\n\tleaq\t96(%rdi),%rdi\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmovq\t0(%rdi),%r8\n\tmovq\t8(%rdi),%r9\n\tmovq\t16(%rdi),%r10\n\tmovq\t24(%rdi),%r11\n\tmovq\t32(%rdi),%r12\n\tmovq\t40(%rdi),%r13\n\tmovq\t48(%rdi),%r14\n\tmovq\t56(%rdi),%r15\n\tmovq\t64(%rdi),%rax\n\tmovq\t72(%rdi),%rbx\n\tmovq\t80(%rdi),%rbp\n\taddq\t%r8,%r8\n\tmovq\t88(%rdi),%rdx\n\tadcq\t%r9,%r9\n\tmovq\t%r8,0(%rdi)\n\tadcq\t%r10,%r10\n\tmovq\t%r9,8(%rdi)\n\tadcq\t%r11,%r11\n\tmovq\t%r10,16(%rdi)\n\tadcq\t%r12,%r12\n\tmovq\t%r11,24(%rdi)\n\tadcq\t%r13,%r13\n\tmovq\t%r12,32(%rdi)\n\tadcq\t%r14,%r14\n\tmovq\t%r13,40(%rdi)\n\tadcq\t%r15,%r15\n\tmovq\t%r14,48(%rdi)\n\tadcq\t%rax,%rax\n\tmovq\t%r15,56(%rdi)\n\tadcq\t%rbx,%rbx\n\tmovq\t%rax,64(%rdi)\n\tadcq\t%rbp,%rbp\n\tmovq\t%rbx,72(%rdi)\n\tadcq\t%rdx,%rdx\n\tmovq\t%rbp,80(%rdi)\n\tmovq\t%rdx,88(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*7\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_mulx_384\n.private_extern\t_mulx_384\n\n.p2align\t5\n_mulx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$mul_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmovq\t0(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-48\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n.p2align\t5\n__mulx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rbx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\tleaq\t-128(%rsi),%rsi\n\n\tmulxq\t%r14,%r9,%rcx\n\txorq\t%rbp,%rbp\n\n\tmulxq\t%r15,%r8,%rax\n\tadcxq\t%rcx,%r8\n\tmovq\t%r9,0(%rdi)\n\n\tmulxq\t%r10,%r9,%rcx\n\tadcxq\t%rax,%r9\n\n\tmulxq\t%r11,%r10,%rax\n\tadcxq\t%rcx,%r10\n\n\tmulxq\t%r12,%r11,%rcx\n\tadcxq\t%rax,%r11\n\n\tmulxq\t%r13,%r12,%r13\n\tmovq\t8(%rbx),%rdx\n\tadcxq\t%rcx,%r12\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,8(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,16(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,24(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,32(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmulxq\t%r14,%rax,%rcx\n\tadcxq\t%r8,%rax\n\tadoxq\t%rcx,%r9\n\tmovq\t%rax,40(%rdi)\n\n\tmulxq\t%r15,%r8,%rcx\n\tadcxq\t%r9,%r8\n\tadoxq\t%rcx,%r10\n\n\tmulxq\t128+16(%rsi),%r9,%rax\n\tadcxq\t%r10,%r9\n\tadoxq\t%rax,%r11\n\n\tmulxq\t128+24(%rsi),%r10,%rcx\n\tadcxq\t%r11,%r10\n\tadoxq\t%rcx,%r12\n\n\tmulxq\t128+32(%rsi),%r11,%rax\n\tadcxq\t%r12,%r11\n\tadoxq\t%r13,%rax\n\n\tmulxq\t128+40(%rsi),%r12,%r13\n\tmovq\t%rax,%rdx\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\tadcxq\t%rbp,%r13\n\tmovq\t%r8,48(%rdi)\n\tmovq\t%r9,56(%rdi)\n\tmovq\t%r10,64(%rdi)\n\tmovq\t%r11,72(%rdi)\n\tmovq\t%r12,80(%rdi)\n\tmovq\t%r13,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n.globl\t_sqrx_384\n.private_extern\t_sqrx_384\n\n.p2align\t5\n_sqrx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tpushq\t%rdi\n.cfi_adjust_cfa_offset\t8\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__sqrx_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__sqrx_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r14\n\tmovq\t16(%rsi),%r15\n\tmovq\t24(%rsi),%rcx\n\tmovq\t32(%rsi),%rbx\n\n\n\tmulxq\t%r14,%r8,%rdi\n\tmovq\t40(%rsi),%rbp\n\tmulxq\t%r15,%r9,%rax\n\taddq\t%rdi,%r9\n\tmulxq\t%rcx,%r10,%rdi\n\tadcq\t%rax,%r10\n\tmulxq\t%rbx,%r11,%rax\n\tadcq\t%rdi,%r11\n\tmulxq\t%rbp,%r12,%r13\n\tmovq\t%r14,%rdx\n\tadcq\t%rax,%r12\n\tadcq\t$0,%r13\n\n\n\txorq\t%r14,%r14\n\tmulxq\t%r15,%rdi,%rax\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rax,%r11\n\n\tmulxq\t%rcx,%rdi,%rax\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rax,%r12\n\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rax,%r13\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%r15,%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%r14,%rax\n\tadcxq\t%rax,%r14\n\n\n\txorq\t%r15,%r15\n\tmulxq\t%rcx,%rdi,%rax\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rax,%r13\n\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rax,%r14\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%r15,%rax\n\tadcxq\t%rax,%r15\n\n\n\txorq\t%rcx,%rcx\n\tmulxq\t%rbx,%rdi,%rax\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rax,%r15\n\n\tmulxq\t%rbp,%rdi,%rax\n\tmovq\t%rbx,%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rcx,%rax\n\tadcxq\t%rax,%rcx\n\n\n\tmulxq\t%rbp,%rdi,%rbx\n\tmovq\t0(%rsi),%rdx\n\taddq\t%rdi,%rcx\n\tmovq\t8(%rsp),%rdi\n\tadcq\t$0,%rbx\n\n\n\txorq\t%rbp,%rbp\n\tadcxq\t%r8,%r8\n\tadcxq\t%r9,%r9\n\tadcxq\t%r10,%r10\n\tadcxq\t%r11,%r11\n\tadcxq\t%r12,%r12\n\n\n\tmulxq\t%rdx,%rdx,%rax\n\tmovq\t%rdx,0(%rdi)\n\tmovq\t8(%rsi),%rdx\n\tadoxq\t%rax,%r8\n\tmovq\t%r8,8(%rdi)\n\n\tmulxq\t%rdx,%r8,%rax\n\tmovq\t16(%rsi),%rdx\n\tadoxq\t%r8,%r9\n\tadoxq\t%rax,%r10\n\tmovq\t%r9,16(%rdi)\n\tmovq\t%r10,24(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t24(%rsi),%rdx\n\tadoxq\t%r8,%r11\n\tadoxq\t%r9,%r12\n\tadcxq\t%r13,%r13\n\tadcxq\t%r14,%r14\n\tmovq\t%r11,32(%rdi)\n\tmovq\t%r12,40(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t32(%rsi),%rdx\n\tadoxq\t%r8,%r13\n\tadoxq\t%r9,%r14\n\tadcxq\t%r15,%r15\n\tadcxq\t%rcx,%rcx\n\tmovq\t%r13,48(%rdi)\n\tmovq\t%r14,56(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tmovq\t40(%rsi),%rdx\n\tadoxq\t%r8,%r15\n\tadoxq\t%r9,%rcx\n\tadcxq\t%rbx,%rbx\n\tadcxq\t%rbp,%rbp\n\tmovq\t%r15,64(%rdi)\n\tmovq\t%rcx,72(%rdi)\n\n\tmulxq\t%rdx,%r8,%r9\n\tadoxq\t%r8,%rbx\n\tadoxq\t%r9,%rbp\n\n\tmovq\t%rbx,80(%rdi)\n\tmovq\t%rbp,88(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n\n.globl\t_redcx_mont_384\n.private_extern\t_redcx_mont_384\n\n.p2align\t5\n_redcx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$redc_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n\n\n\n.globl\t_fromx_mont_384\n.private_extern\t_fromx_mont_384\n\n.p2align\t5\n_fromx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$from_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\n\n\n\tmovq\t%r14,%rax\n\tmovq\t%r15,%rcx\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulx_by_1_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t%rcx,%rdx\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\timulq\t%r8,%rdx\n\n\n\txorq\t%r14,%r14\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r13\n\tadoxq\t%r14,%rbp\n\tadcxq\t%rbp,%r14\n\timulq\t%r9,%rdx\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r14\n\tadoxq\t%r15,%rbp\n\tadcxq\t%rbp,%r15\n\timulq\t%r10,%rdx\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r15\n\tadoxq\t%r8,%rbp\n\tadcxq\t%rbp,%r8\n\timulq\t%r11,%rdx\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r8\n\tadoxq\t%r9,%rbp\n\tadcxq\t%rbp,%r9\n\timulq\t%r12,%rdx\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r9\n\tadoxq\t%r10,%rbp\n\tadcxq\t%rbp,%r10\n\timulq\t%r13,%rdx\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r15\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t24(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t32(%rbx),%rax,%rbp\n\tadcxq\t%rax,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t40(%rbx),%rax,%rbp\n\tmovq\t%rcx,%rdx\n\tadcxq\t%rax,%r10\n\tadoxq\t%r11,%rbp\n\tadcxq\t%rbp,%r11\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n\n.p2align\t5\n__redx_tail_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\taddq\t48(%rsi),%r14\n\tmovq\t%r14,%rax\n\tadcq\t56(%rsi),%r15\n\tadcq\t64(%rsi),%r8\n\tadcq\t72(%rsi),%r9\n\tmovq\t%r15,%rcx\n\tadcq\t80(%rsi),%r10\n\tadcq\t88(%rsi),%r11\n\tsbbq\t%r12,%r12\n\n\n\n\n\tmovq\t%r8,%rdx\n\tmovq\t%r9,%rbp\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tmovq\t%r10,%r13\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tmovq\t%r11,%rsi\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%r12\n\n\tcmovcq\t%rax,%r14\n\tcmovcq\t%rcx,%r15\n\tcmovcq\t%rdx,%r8\n\tmovq\t%r14,0(%rdi)\n\tcmovcq\t%rbp,%r9\n\tmovq\t%r15,8(%rdi)\n\tcmovcq\t%r13,%r10\n\tmovq\t%r8,16(%rdi)\n\tcmovcq\t%rsi,%r11\n\tmovq\t%r9,24(%rdi)\n\tmovq\t%r10,32(%rdi)\n\tmovq\t%r11,40(%rdi)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_sgn0x_pty_mont_384\n.private_extern\t_sgn0x_pty_mont_384\n\n.p2align\t5\n_sgn0x_pty_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sgn0_pty_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t0(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\txorq\t%rax,%rax\n\tmovq\t%r14,%r13\n\taddq\t%r14,%r14\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r14\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tnotq\t%rax\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sgn0x_pty_mont_384x\n.private_extern\t_sgn0x_pty_mont_384x\n\n.p2align\t5\n_sgn0x_pty_mont_384x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sgn0_pty_mont_384x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$8,%rsp\n.cfi_adjust_cfa_offset\t8\n\n\n\tmovq\t%rsi,%rbx\n\tleaq\t48(%rdi),%rsi\n\tmovq\t%rdx,%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\tleaq\t0(%rdi),%rsi\n\txorq\t%rdi,%rdi\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rdi\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rdi\n\n\tmovq\t%r14,0(%rsp)\n\tnotq\t%rdi\n\tandq\t$1,%r13\n\tandq\t$2,%rdi\n\torq\t%r13,%rdi\n\n\tcall\t__mulx_by_1_mont_384\n\n\tmovq\t%r14,%r12\n\torq\t%r15,%r14\n\torq\t%r8,%r14\n\torq\t%r9,%r14\n\torq\t%r10,%r14\n\torq\t%r11,%r14\n\n\txorq\t%rax,%rax\n\tmovq\t%r12,%r13\n\taddq\t%r12,%r12\n\tadcq\t%r15,%r15\n\tadcq\t%r8,%r8\n\tadcq\t%r9,%r9\n\tadcq\t%r10,%r10\n\tadcq\t%r11,%r11\n\tadcq\t$0,%rax\n\n\tsubq\t0(%rbx),%r12\n\tsbbq\t8(%rbx),%r15\n\tsbbq\t16(%rbx),%r8\n\tsbbq\t24(%rbx),%r9\n\tsbbq\t32(%rbx),%r10\n\tsbbq\t40(%rbx),%r11\n\tsbbq\t$0,%rax\n\n\tmovq\t0(%rsp),%r12\n\n\tnotq\t%rax\n\n\ttestq\t%r14,%r14\n\tcmovzq\t%rdi,%r13\n\n\ttestq\t%r12,%r12\n\tcmovnzq\t%rdi,%rax\n\n\tandq\t$1,%r13\n\tandq\t$2,%rax\n\torq\t%r13,%rax\n\n\tmovq\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_mulx_mont_384\n.private_extern\t_mulx_mont_384\n\n.p2align\t5\n_mulx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$mul_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*3\n\n\n\tmovq\t%rdx,%rbx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rdx),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\tmovq\t%r8,(%rsp)\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*9\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tmulxq\t%r15,%r14,%r10\n\tmulxq\t%rax,%r15,%r11\n\taddq\t%r14,%r9\n\tmulxq\t%r12,%rax,%r12\n\tadcq\t%r15,%r10\n\tmulxq\t%rdi,%rdi,%r13\n\tadcq\t%rax,%r11\n\tmulxq\t%rbp,%rbp,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rdi,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t$0,%r14\n\txorq\t%r15,%r15\n\n\tmovq\t%r8,16(%rsp)\n\timulq\t8(%rsp),%r8\n\n\n\txorq\t%rax,%rax\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r8,%rdx\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\tadoxq\t%rax,%r15\n\tadoxq\t%rax,%rax\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\tadcxq\t%r8,%r14\n\tadoxq\t%r8,%r15\n\tadcxq\t%r8,%r15\n\tadoxq\t%r8,%rax\n\tadcxq\t%r8,%rax\n\tmovq\t%r9,16(%rsp)\n\timulq\t8(%rsp),%r9\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r9,%rdx\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\tadoxq\t%r8,%rax\n\tadoxq\t%r8,%r8\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r9,%r15\n\tadoxq\t%r9,%rax\n\tadcxq\t%r9,%rax\n\tadoxq\t%r9,%r8\n\tadcxq\t%r9,%r8\n\tmovq\t%r10,16(%rsp)\n\timulq\t8(%rsp),%r10\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r10,%rdx\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\tadoxq\t%r9,%r8\n\tadoxq\t%r9,%r9\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\tadcxq\t%r10,%rax\n\tadoxq\t%r10,%r8\n\tadcxq\t%r10,%r8\n\tadoxq\t%r10,%r9\n\tadcxq\t%r10,%r9\n\tmovq\t%r11,16(%rsp)\n\timulq\t8(%rsp),%r11\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r11,%rdx\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\tadoxq\t%r10,%r9\n\tadoxq\t%r10,%r10\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tadcxq\t%r11,%r8\n\tadoxq\t%r11,%r9\n\tadcxq\t%r11,%r9\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\tmovq\t%r12,16(%rsp)\n\timulq\t8(%rsp),%r12\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r12,%rdx\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\tadoxq\t%r11,%r10\n\tadoxq\t%r11,%r11\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t16(%rsp),%rdi\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r13,%rdx\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tadcxq\t%r12,%r9\n\tadoxq\t%r12,%r10\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tadcxq\t%r12,%r11\n\timulq\t8(%rsp),%rdx\n\tmovq\t24(%rsp),%rbx\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tmovq\t%r15,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tmovq\t%rax,%rsi\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\tmovq\t%r14,%rdx\n\tadcxq\t%r12,%r10\n\tadoxq\t%r12,%r11\n\tleaq\t128(%rcx),%rcx\n\tmovq\t%r8,%r12\n\tadcq\t$0,%r11\n\n\n\n\n\tsubq\t0(%rcx),%r14\n\tsbbq\t8(%rcx),%r15\n\tmovq\t%r9,%rdi\n\tsbbq\t16(%rcx),%rax\n\tsbbq\t24(%rcx),%r8\n\tsbbq\t32(%rcx),%r9\n\tmovq\t%r10,%rbp\n\tsbbq\t40(%rcx),%r10\n\tsbbq\t$0,%r11\n\n\tcmovncq\t%r14,%rdx\n\tcmovcq\t%r13,%r15\n\tcmovcq\t%rsi,%rax\n\tcmovncq\t%r8,%r12\n\tmovq\t%rdx,0(%rbx)\n\tcmovncq\t%r9,%rdi\n\tmovq\t%r15,8(%rbx)\n\tcmovncq\t%r10,%rbp\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r12,24(%rbx)\n\tmovq\t%rdi,32(%rbx)\n\tmovq\t%rbp,40(%rbx)\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rsi\n\tlfence\n\tjmpq\t*%rsi\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sqrx_mont_384\n.private_extern\t_sqrx_mont_384\n\n.p2align\t5\n_sqrx_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*3\n\n\n\tmovq\t%rcx,%r8\n\tleaq\t-128(%rdx),%rcx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tleaq\t(%rsi),%rbx\n\tmovq\t%r8,(%rsp)\n\tleaq\t-128(%rsi),%rsi\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*9\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sqrx_n_mul_mont_384\n.private_extern\t_sqrx_n_mul_mont_384\n\n.p2align\t5\n_sqrx_n_mul_mont_384:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_n_mul_mont_384$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-40(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*5\n\n\n\tmovq\t%rdx,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t%rsi,%rbx\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tmovq\t%r8,(%rsp)\n\tmovq\t%r9,24(%rsp)\n\tmovq\t0(%r9),%xmm2\n\nL$oop_sqrx_384:\n\tmovd\t%r10d,%xmm1\n\tleaq\t-128(%rbx),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovd\t%xmm1,%r10d\n\tdecl\t%r10d\n\tjnz\tL$oop_sqrx_384\n\n\tmovq\t%rdx,%r14\n.byte\t102,72,15,126,210\n\tleaq\t-128(%rbx),%rsi\n\tmovq\t24(%rsp),%rbx\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t40(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t48(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t56(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t64(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t72(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t80(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t88(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.globl\t_sqrx_n_mul_mont_383\n.private_extern\t_sqrx_n_mul_mont_383\n\n.p2align\t5\n_sqrx_n_mul_mont_383:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_n_mul_mont_383$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tleaq\t-40(%rsp),%rsp\n.cfi_adjust_cfa_offset\t8*5\n\n\n\tmovq\t%rdx,%r10\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%rdx\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t%rsi,%rbx\n\tmovq\t24(%rsi),%r12\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\n\tmovq\t%r8,(%rsp)\n\tmovq\t%r9,24(%rsp)\n\tmovq\t0(%r9),%xmm2\n\tleaq\t-128(%rcx),%rcx\n\nL$oop_sqrx_383:\n\tmovd\t%r10d,%xmm1\n\tleaq\t-128(%rbx),%rsi\n\n\tmulxq\t%rdx,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\n\tmovd\t%xmm1,%r10d\n\tdecl\t%r10d\n\tjnz\tL$oop_sqrx_383\n\n\tmovq\t%rdx,%r14\n.byte\t102,72,15,126,210\n\tleaq\t-128(%rbx),%rsi\n\tmovq\t24(%rsp),%rbx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_384\n\n\tmovq\t40(%rsp),%r15\n.cfi_restore\t%r15\n\tmovq\t48(%rsp),%r14\n.cfi_restore\t%r14\n\tmovq\t56(%rsp),%r13\n.cfi_restore\t%r13\n\tmovq\t64(%rsp),%r12\n.cfi_restore\t%r12\n\tmovq\t72(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmovq\t80(%rsp),%rbp\n.cfi_restore\t%rbp\n\tleaq\t88(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*11\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n.p2align\t5\n__mulx_mont_383_nonred:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tmulxq\t%r15,%r14,%r10\n\tmulxq\t%rax,%r15,%r11\n\taddq\t%r14,%r9\n\tmulxq\t%r12,%rax,%r12\n\tadcq\t%r15,%r10\n\tmulxq\t%rdi,%rdi,%r13\n\tadcq\t%rax,%r11\n\tmulxq\t%rbp,%rbp,%r14\n\tmovq\t8(%rbx),%rdx\n\tadcq\t%rdi,%r12\n\tadcq\t%rbp,%r13\n\tadcq\t$0,%r14\n\tmovq\t%r8,%rax\n\timulq\t8(%rsp),%r8\n\n\n\txorq\t%r15,%r15\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r9\n\tadcxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r8,%rdx\n\tadoxq\t%rdi,%r14\n\tadcxq\t%r15,%rbp\n\tadoxq\t%rbp,%r15\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t16(%rbx),%rdx\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\tadcxq\t%rax,%r14\n\tadoxq\t%rax,%r15\n\tadcxq\t%rax,%r15\n\tmovq\t%r9,%r8\n\timulq\t8(%rsp),%r9\n\n\n\txorq\t%rax,%rax\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r10\n\tadcxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r9,%rdx\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rax,%rbp\n\tadoxq\t%rbp,%rax\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r10\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t24(%rbx),%rdx\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\tadcxq\t%r8,%r15\n\tadoxq\t%r8,%rax\n\tadcxq\t%r8,%rax\n\tmovq\t%r10,%r9\n\timulq\t8(%rsp),%r10\n\n\n\txorq\t%r8,%r8\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r11\n\tadcxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r10,%rdx\n\tadoxq\t%rdi,%rax\n\tadcxq\t%r8,%rbp\n\tadoxq\t%rbp,%r8\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r11\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t32(%rbx),%rdx\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\tadcxq\t%r9,%rax\n\tadoxq\t%r9,%r8\n\tadcxq\t%r9,%r8\n\tmovq\t%r11,%r10\n\timulq\t8(%rsp),%r11\n\n\n\txorq\t%r9,%r9\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r12\n\tadcxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r11,%rdx\n\tadoxq\t%rdi,%r8\n\tadcxq\t%r9,%rbp\n\tadoxq\t%rbp,%r9\n\n\n\txorq\t%r11,%r11\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r10\n\tadoxq\t%rbp,%r12\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r12\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t40(%rbx),%rdx\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\tadcxq\t%r10,%r8\n\tadoxq\t%r10,%r9\n\tadcxq\t%r10,%r9\n\tmovq\t%r12,%r11\n\timulq\t8(%rsp),%r12\n\n\n\txorq\t%r10,%r10\n\tmulxq\t0+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r13\n\tadcxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r14\n\tadcxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r15\n\tadcxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%rax\n\tadcxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rsi),%rdi,%rbp\n\tadoxq\t%rdi,%r8\n\tadcxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rsi),%rdi,%rbp\n\tmovq\t%r12,%rdx\n\tadoxq\t%rdi,%r9\n\tadcxq\t%r10,%rbp\n\tadoxq\t%rbp,%r10\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r11\n\tadoxq\t%rbp,%r13\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r13,%rdx\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\tadcxq\t%r11,%r9\n\tadoxq\t%r11,%r10\n\tadcxq\t%r11,%r10\n\timulq\t8(%rsp),%rdx\n\tmovq\t24(%rsp),%rbx\n\n\n\txorq\t%r12,%r12\n\tmulxq\t0+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r13\n\tadoxq\t%rbp,%r14\n\n\tmulxq\t8+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r14\n\tadoxq\t%rbp,%r15\n\n\tmulxq\t16+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r15\n\tadoxq\t%rbp,%rax\n\n\tmulxq\t24+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%rax\n\tadoxq\t%rbp,%r8\n\n\tmulxq\t32+128(%rcx),%rdi,%rbp\n\tadcxq\t%rdi,%r8\n\tadoxq\t%rbp,%r9\n\n\tmulxq\t40+128(%rcx),%rdi,%rbp\n\tmovq\t%r14,%rdx\n\tadcxq\t%rdi,%r9\n\tadoxq\t%rbp,%r10\n\tadcq\t$0,%r10\n\tmovq\t%r8,%r12\n\n\tmovq\t%r14,0(%rbx)\n\tmovq\t%r15,8(%rbx)\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r9,%rdi\n\tmovq\t%r8,24(%rbx)\n\tmovq\t%r9,32(%rbx)\n\tmovq\t%r10,40(%rbx)\n\tmovq\t%r10,%rbp\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rsi\n\tlfence\n\tjmpq\t*%rsi\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_sqrx_mont_382x\n.private_extern\t_sqrx_mont_382x\n\n.p2align\t5\n_sqrx_mont_382x:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\nL$sqr_mont_382x$1:\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tpushq\t%rbx\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%r15,-56\n\tsubq\t$136,%rsp\n.cfi_adjust_cfa_offset\t136\n\n\n\tmovq\t%rcx,0(%rsp)\n\tmovq\t%rdx,%rcx\n\tmovq\t%rdi,16(%rsp)\n\tmovq\t%rsi,24(%rsp)\n\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t32(%rsi),%r12\n\tmovq\t40(%rsi),%r13\n\n\tmovq\t%r8,%r14\n\taddq\t48(%rsi),%r8\n\tmovq\t%r9,%r15\n\tadcq\t56(%rsi),%r9\n\tmovq\t%r10,%rax\n\tadcq\t64(%rsi),%r10\n\tmovq\t%r11,%rdx\n\tadcq\t72(%rsi),%r11\n\tmovq\t%r12,%rbx\n\tadcq\t80(%rsi),%r12\n\tmovq\t%r13,%rbp\n\tadcq\t88(%rsi),%r13\n\n\tsubq\t48(%rsi),%r14\n\tsbbq\t56(%rsi),%r15\n\tsbbq\t64(%rsi),%rax\n\tsbbq\t72(%rsi),%rdx\n\tsbbq\t80(%rsi),%rbx\n\tsbbq\t88(%rsi),%rbp\n\tsbbq\t%rdi,%rdi\n\n\tmovq\t%r8,32+0(%rsp)\n\tmovq\t%r9,32+8(%rsp)\n\tmovq\t%r10,32+16(%rsp)\n\tmovq\t%r11,32+24(%rsp)\n\tmovq\t%r12,32+32(%rsp)\n\tmovq\t%r13,32+40(%rsp)\n\n\tmovq\t%r14,32+48(%rsp)\n\tmovq\t%r15,32+56(%rsp)\n\tmovq\t%rax,32+64(%rsp)\n\tmovq\t%rdx,32+72(%rsp)\n\tmovq\t%rbx,32+80(%rsp)\n\tmovq\t%rbp,32+88(%rsp)\n\tmovq\t%rdi,32+96(%rsp)\n\n\n\n\tleaq\t48(%rsi),%rbx\n\n\tmovq\t48(%rsi),%rdx\n\tmovq\t0(%rsi),%r14\n\tmovq\t8(%rsi),%r15\n\tmovq\t16(%rsi),%rax\n\tmovq\t24(%rsi),%r12\n\tmovq\t32(%rsi),%rdi\n\tmovq\t40(%rsi),%rbp\n\tleaq\t-128(%rsi),%rsi\n\tleaq\t-128(%rcx),%rcx\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\taddq\t%rdx,%rdx\n\tadcq\t%r15,%r15\n\tadcq\t%rax,%rax\n\tadcq\t%r12,%r12\n\tadcq\t%rdi,%rdi\n\tadcq\t%rbp,%rbp\n\n\tmovq\t%rdx,48(%rbx)\n\tmovq\t%r15,56(%rbx)\n\tmovq\t%rax,64(%rbx)\n\tmovq\t%r12,72(%rbx)\n\tmovq\t%rdi,80(%rbx)\n\tmovq\t%rbp,88(%rbx)\n\n\tleaq\t32-128(%rsp),%rsi\n\tleaq\t32+48(%rsp),%rbx\n\n\tmovq\t32+48(%rsp),%rdx\n\tmovq\t32+0(%rsp),%r14\n\tmovq\t32+8(%rsp),%r15\n\tmovq\t32+16(%rsp),%rax\n\tmovq\t32+24(%rsp),%r12\n\tmovq\t32+32(%rsp),%rdi\n\tmovq\t32+40(%rsp),%rbp\n\n\n\n\tmulxq\t%r14,%r8,%r9\n\tcall\t__mulx_mont_383_nonred\n\tmovq\t32+96(%rsp),%r14\n\tleaq\t128(%rcx),%rcx\n\tmovq\t32+0(%rsp),%r8\n\tandq\t%r14,%r8\n\tmovq\t32+8(%rsp),%r9\n\tandq\t%r14,%r9\n\tmovq\t32+16(%rsp),%r10\n\tandq\t%r14,%r10\n\tmovq\t32+24(%rsp),%r11\n\tandq\t%r14,%r11\n\tmovq\t32+32(%rsp),%r13\n\tandq\t%r14,%r13\n\tandq\t32+40(%rsp),%r14\n\n\tsubq\t%r8,%rdx\n\tmovq\t0(%rcx),%r8\n\tsbbq\t%r9,%r15\n\tmovq\t8(%rcx),%r9\n\tsbbq\t%r10,%rax\n\tmovq\t16(%rcx),%r10\n\tsbbq\t%r11,%r12\n\tmovq\t24(%rcx),%r11\n\tsbbq\t%r13,%rdi\n\tmovq\t32(%rcx),%r13\n\tsbbq\t%r14,%rbp\n\tsbbq\t%r14,%r14\n\n\tandq\t%r14,%r8\n\tandq\t%r14,%r9\n\tandq\t%r14,%r10\n\tandq\t%r14,%r11\n\tandq\t%r14,%r13\n\tandq\t40(%rcx),%r14\n\n\taddq\t%r8,%rdx\n\tadcq\t%r9,%r15\n\tadcq\t%r10,%rax\n\tadcq\t%r11,%r12\n\tadcq\t%r13,%rdi\n\tadcq\t%r14,%rbp\n\n\tmovq\t%rdx,0(%rbx)\n\tmovq\t%r15,8(%rbx)\n\tmovq\t%rax,16(%rbx)\n\tmovq\t%r12,24(%rbx)\n\tmovq\t%rdi,32(%rbx)\n\tmovq\t%rbp,40(%rbx)\n\tleaq\t136(%rsp),%r8\n\tmovq\t0(%r8),%r15\n.cfi_restore\t%r15\n\tmovq\t8(%r8),%r14\n.cfi_restore\t%r14\n\tmovq\t16(%r8),%r13\n.cfi_restore\t%r13\n\tmovq\t24(%r8),%r12\n.cfi_restore\t%r12\n\tmovq\t32(%r8),%rbx\n.cfi_restore\t%rbx\n\tmovq\t40(%r8),%rbp\n.cfi_restore\t%rbp\n\tleaq\t48(%r8),%rsp\n.cfi_adjust_cfa_offset\t-136-8*6\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n"
  },
  {
    "path": "build/mach-o/sha256-armv8.S",
    "content": "//\n// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n//\n// ====================================================================\n// Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n// project.\n// ====================================================================\n//\n// sha256_block procedure for ARMv8.\n//\n// This module is stripped of scalar code paths, with rationale that all\n// known processors are NEON-capable.\n//\n// See original module at CRYPTOGAMS for further details.\n\n.comm\t___blst_platform_cap,4\n.text\n\n.align\t6\n\nLK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n.long\t0\t//terminator\n\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.align\t2\n.align\t2\n.globl\t_blst_sha256_block_armv8\n.private_extern\t_blst_sha256_block_armv8\n\n.align\t6\n_blst_sha256_block_armv8:\n\thint\t#34\nLv8_entry:\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tld1\t{v0.4s,v1.4s},[x0]\n\tadr\tx3,LK256\n\nLoop_hw:\n\tld1\t{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64\n\tsub\tx2,x2,#1\n\tld1\t{v16.4s},[x3],#16\n\trev32\tv4.16b,v4.16b\n\trev32\tv5.16b,v5.16b\n\trev32\tv6.16b,v6.16b\n\trev32\tv7.16b,v7.16b\n\torr\tv18.16b,v0.16b,v0.16b\t\t// offload\n\torr\tv19.16b,v1.16b,v1.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.long\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.long\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.long\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.long\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.long\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.long\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.long\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.long\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n.long\t0x5e2828a4\t//sha256su0 v4.16b,v5.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e0760c4\t//sha256su1 v4.16b,v6.16b,v7.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n.long\t0x5e2828c5\t//sha256su0 v5.16b,v6.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0460e5\t//sha256su1 v5.16b,v7.16b,v4.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n.long\t0x5e2828e6\t//sha256su0 v6.16b,v7.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n.long\t0x5e056086\t//sha256su1 v6.16b,v4.16b,v5.16b\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n.long\t0x5e282887\t//sha256su0 v7.16b,v4.16b\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n.long\t0x5e0660a7\t//sha256su1 v7.16b,v5.16b,v6.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tld1\t{v17.4s},[x3]\n\tadd\tv16.4s,v16.4s,v6.4s\n\tsub\tx3,x3,#64*4-16\t// rewind\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e104020\t//sha256h v0.16b,v1.16b,v16.4s\n.long\t0x5e105041\t//sha256h2 v1.16b,v2.16b,v16.4s\n\n\tadd\tv17.4s,v17.4s,v7.4s\n\torr\tv2.16b,v0.16b,v0.16b\n.long\t0x5e114020\t//sha256h v0.16b,v1.16b,v17.4s\n.long\t0x5e115041\t//sha256h2 v1.16b,v2.16b,v17.4s\n\n\tadd\tv0.4s,v0.4s,v18.4s\n\tadd\tv1.4s,v1.4s,v19.4s\n\n\tcbnz\tx2,Loop_hw\n\n\tst1\t{v0.4s,v1.4s},[x0]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\tret\n\n.globl\t_blst_sha256_block_data_order\n.private_extern\t_blst_sha256_block_data_order\n\n.align\t4\n_blst_sha256_block_data_order:\n\thint\t#34\n\tadrp\tx16,___blst_platform_cap@PAGE\n\tldr\tw16,[x16,___blst_platform_cap@PAGEOFF]\n\ttst\tw16,#1\n\tb.ne\tLv8_entry\n\n\tstp\tx29, x30, [sp, #-2*__SIZEOF_POINTER__]!\n\tmov\tx29, sp\n\tsub\tsp,sp,#16*4\n\n\tadr\tx16,LK256\n\tadd\tx2,x1,x2,lsl#6\t// len to point at the end of inp\n\n\tld1\t{v0.16b},[x1], #16\n\tld1\t{v1.16b},[x1], #16\n\tld1\t{v2.16b},[x1], #16\n\tld1\t{v3.16b},[x1], #16\n\tld1\t{v4.4s},[x16], #16\n\tld1\t{v5.4s},[x16], #16\n\tld1\t{v6.4s},[x16], #16\n\tld1\t{v7.4s},[x16], #16\n\trev32\tv0.16b,v0.16b\t\t// yes, even on\n\trev32\tv1.16b,v1.16b\t\t// big-endian\n\trev32\tv2.16b,v2.16b\n\trev32\tv3.16b,v3.16b\n\tmov\tx17,sp\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tv5.4s,v5.4s,v1.4s\n\tadd\tv6.4s,v6.4s,v2.4s\n\tst1\t{v4.4s,v5.4s},[x17], #32\n\tadd\tv7.4s,v7.4s,v3.4s\n\tst1\t{v6.4s,v7.4s},[x17]\n\tsub\tx17,x17,#32\n\n\tldp\tw3,w4,[x0]\n\tldp\tw5,w6,[x0,#8]\n\tldp\tw7,w8,[x0,#16]\n\tldp\tw9,w10,[x0,#24]\n\tldr\tw12,[sp,#0]\n\tmov\tw13,wzr\n\teor\tw14,w4,w5\n\tmov\tw15,wzr\n\tb\tL_00_48\n\n.align\t4\nL_00_48:\n\text\tv4.16b,v0.16b,v1.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v2.16b,v3.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v3.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv0.4s,v0.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv0.4s,v0.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv0.4s,v0.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v0.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v0.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v0.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v0.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v0.4s,#13\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv0.4s,v0.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v1.16b,v2.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v3.16b,v0.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v0.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv1.4s,v1.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv1.4s,v1.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv1.4s,v1.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v1.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v1.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v1.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v1.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v1.4s,#13\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv1.4s,v1.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v2.16b,v3.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text\tv7.16b,v0.16b,v1.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v1.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv2.4s,v2.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv2.4s,v2.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv2.4s,v2.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v2.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v2.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v2.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v2.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v2.4s,#13\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv2.4s,v2.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text\tv4.16b,v3.16b,v0.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text\tv7.16b,v1.16b,v2.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v2.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv3.4s,v3.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv3.4s,v3.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv3.4s,v3.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v3.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v3.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v3.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v3.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v3.4s,#13\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv3.4s,v3.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[x16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tcmp\tw12,#0\t\t\t\t// check for K256 terminator\n\tldr\tw12,[sp,#0]\n\tsub\tx17,x17,#64\n\tbne\tL_00_48\n\n\tsub\tx16,x16,#256\n\tcmp\tx1,x2\n\tmov\tx17, #-64\n\tcsel\tx17, x17, xzr, eq\n\tadd\tx1,x1,x17\n\tmov\tx17,sp\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v0.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv0.16b,v0.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v1.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv1.16b,v1.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v2.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv2.16b,v2.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v3.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv3.16b,v3.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw3,w3,w15\t\t\t// h+=Sigma0(a) from the past\n\tldp\tw11,w12,[x0,#0]\n\tadd\tw3,w3,w13\t\t\t// h+=Maj(a,b,c) from the past\n\tldp\tw13,w14,[x0,#8]\n\tadd\tw3,w3,w11\t\t\t// accumulate\n\tadd\tw4,w4,w12\n\tldp\tw11,w12,[x0,#16]\n\tadd\tw5,w5,w13\n\tadd\tw6,w6,w14\n\tldp\tw13,w14,[x0,#24]\n\tadd\tw7,w7,w11\n\tadd\tw8,w8,w12\n\tldr\tw12,[sp,#0]\n\tstp\tw3,w4,[x0,#0]\n\tadd\tw9,w9,w13\n\tmov\tw13,wzr\n\tstp\tw5,w6,[x0,#8]\n\tadd\tw10,w10,w14\n\tstp\tw7,w8,[x0,#16]\n\teor\tw14,w4,w5\n\tstp\tw9,w10,[x0,#24]\n\tmov\tw15,wzr\n\tmov\tx17,sp\n\tb.ne\tL_00_48\n\n\tldr\tx29,[x29]\n\tadd\tsp,sp,#16*4+2*__SIZEOF_POINTER__\n\tret\n\n.globl\t_blst_sha256_emit\n.private_extern\t_blst_sha256_emit\n\n.align\t4\n_blst_sha256_emit:\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n#ifndef\t__AARCH64EB__\n\trev\tx4,x4\n\trev\tx5,x5\n\trev\tx6,x6\n\trev\tx7,x7\n#endif\n\tstr\tw4,[x0,#4]\n\tlsr\tx4,x4,#32\n\tstr\tw5,[x0,#12]\n\tlsr\tx5,x5,#32\n\tstr\tw6,[x0,#20]\n\tlsr\tx6,x6,#32\n\tstr\tw7,[x0,#28]\n\tlsr\tx7,x7,#32\n\tstr\tw4,[x0,#0]\n\tstr\tw5,[x0,#8]\n\tstr\tw6,[x0,#16]\n\tstr\tw7,[x0,#24]\n\tret\n\n\n.globl\t_blst_sha256_bcopy\n.private_extern\t_blst_sha256_bcopy\n\n.align\t4\n_blst_sha256_bcopy:\n\thint\t#34\nLoop_bcopy:\n\tldrb\tw3,[x1],#1\n\tsub\tx2,x2,#1\n\tstrb\tw3,[x0],#1\n\tcbnz\tx2,Loop_bcopy\n\tret\n\n\n.globl\t_blst_sha256_hcopy\n.private_extern\t_blst_sha256_hcopy\n\n.align\t4\n_blst_sha256_hcopy:\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tstp\tx4,x5,[x0]\n\tstp\tx6,x7,[x0,#16]\n\tret\n\n"
  },
  {
    "path": "build/mach-o/sha256-portable-x86_64.s",
    "content": ".comm\t___blst_platform_cap,4\n.text\t\n\n.globl\t_blst_sha256_block_data_order\n\n.p2align\t4\n_blst_sha256_block_data_order:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tmovq\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n#ifdef __BLST_PORTABLE__\n\ttestl\t$2,___blst_platform_cap(%rip)\n\tjnz\tL$blst_sha256_block_data_order$2\n#endif\n\tpushq\t%rbx\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_offset\t%r15,-56\n\tshlq\t$4,%rdx\n\tsubq\t$64+24,%rsp\n\n.cfi_def_cfa\t%rsp,144\n\n\tleaq\t(%rsi,%rdx,4),%rdx\n\tmovq\t%rdi,64+0(%rsp)\n\tmovq\t%rsi,64+8(%rsp)\n\tmovq\t%rdx,64+16(%rsp)\n\n\tmovl\t0(%rdi),%eax\n\tmovl\t4(%rdi),%ebx\n\tmovl\t8(%rdi),%ecx\n\tmovl\t12(%rdi),%edx\n\tmovl\t16(%rdi),%r8d\n\tmovl\t20(%rdi),%r9d\n\tmovl\t24(%rdi),%r10d\n\tmovl\t28(%rdi),%r11d\n\tjmp\tL$loop\n\n.p2align\t4\nL$loop:\n\tmovl\t%ebx,%edi\n\tleaq\tK256(%rip),%rbp\n\txorl\t%ecx,%edi\n\tmovl\t0(%rsi),%r12d\n\tmovl\t%r8d,%r13d\n\tmovl\t%eax,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,0(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t0(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r14d,%r11d\n\tmovl\t4(%rsi),%r12d\n\tmovl\t%edx,%r13d\n\tmovl\t%r11d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,4(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t4(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r14d,%r10d\n\tmovl\t8(%rsi),%r12d\n\tmovl\t%ecx,%r13d\n\tmovl\t%r10d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,8(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t8(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r14d,%r9d\n\tmovl\t12(%rsi),%r12d\n\tmovl\t%ebx,%r13d\n\tmovl\t%r9d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,12(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t12(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r14d,%r8d\n\tmovl\t16(%rsi),%r12d\n\tmovl\t%eax,%r13d\n\tmovl\t%r8d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,16(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t16(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r14d,%edx\n\tmovl\t20(%rsi),%r12d\n\tmovl\t%r11d,%r13d\n\tmovl\t%edx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,20(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t20(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r14d,%ecx\n\tmovl\t24(%rsi),%r12d\n\tmovl\t%r10d,%r13d\n\tmovl\t%ecx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,24(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t24(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r14d,%ebx\n\tmovl\t28(%rsi),%r12d\n\tmovl\t%r9d,%r13d\n\tmovl\t%ebx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,28(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t28(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r14d,%eax\n\tmovl\t32(%rsi),%r12d\n\tmovl\t%r8d,%r13d\n\tmovl\t%eax,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,32(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t32(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r14d,%r11d\n\tmovl\t36(%rsi),%r12d\n\tmovl\t%edx,%r13d\n\tmovl\t%r11d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,36(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t36(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r14d,%r10d\n\tmovl\t40(%rsi),%r12d\n\tmovl\t%ecx,%r13d\n\tmovl\t%r10d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,40(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t40(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r14d,%r9d\n\tmovl\t44(%rsi),%r12d\n\tmovl\t%ebx,%r13d\n\tmovl\t%r9d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,44(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t44(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r14d,%r8d\n\tmovl\t48(%rsi),%r12d\n\tmovl\t%eax,%r13d\n\tmovl\t%r8d,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,48(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t48(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r14d,%edx\n\tmovl\t52(%rsi),%r12d\n\tmovl\t%r11d,%r13d\n\tmovl\t%edx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,52(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t52(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r14d,%ecx\n\tmovl\t56(%rsi),%r12d\n\tmovl\t%r10d,%r13d\n\tmovl\t%ecx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,56(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t56(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r14d,%ebx\n\tmovl\t60(%rsi),%r12d\n\tmovl\t%r9d,%r13d\n\tmovl\t%ebx,%r14d\n\tbswapl\t%r12d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,60(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t60(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tjmp\tL$rounds_16_xx\n.p2align\t4\nL$rounds_16_xx:\n\tmovl\t4(%rsp),%r13d\n\tmovl\t56(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%eax\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t36(%rsp),%r12d\n\n\taddl\t0(%rsp),%r12d\n\tmovl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,0(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t64(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\tmovl\t8(%rsp),%r13d\n\tmovl\t60(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r11d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t40(%rsp),%r12d\n\n\taddl\t4(%rsp),%r12d\n\tmovl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,4(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t68(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\tmovl\t12(%rsp),%r13d\n\tmovl\t0(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r10d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t44(%rsp),%r12d\n\n\taddl\t8(%rsp),%r12d\n\tmovl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,8(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t72(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\tmovl\t16(%rsp),%r13d\n\tmovl\t4(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r9d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t48(%rsp),%r12d\n\n\taddl\t12(%rsp),%r12d\n\tmovl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,12(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t76(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\tmovl\t20(%rsp),%r13d\n\tmovl\t8(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r8d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t52(%rsp),%r12d\n\n\taddl\t16(%rsp),%r12d\n\tmovl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,16(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t80(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\tmovl\t24(%rsp),%r13d\n\tmovl\t12(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%edx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t56(%rsp),%r12d\n\n\taddl\t20(%rsp),%r12d\n\tmovl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,20(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t84(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\tmovl\t28(%rsp),%r13d\n\tmovl\t16(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ecx\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t60(%rsp),%r12d\n\n\taddl\t24(%rsp),%r12d\n\tmovl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,24(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t88(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\tmovl\t32(%rsp),%r13d\n\tmovl\t20(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ebx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t0(%rsp),%r12d\n\n\taddl\t28(%rsp),%r12d\n\tmovl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,28(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t92(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tmovl\t36(%rsp),%r13d\n\tmovl\t24(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%eax\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t4(%rsp),%r12d\n\n\taddl\t32(%rsp),%r12d\n\tmovl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r9d,%r15d\n\n\txorl\t%r8d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r15d\n\n\tmovl\t%r12d,32(%rsp)\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r11d,%r12d\n\txorl\t%r10d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r8d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%eax,%r15d\n\taddl\t96(%rbp),%r12d\n\txorl\t%eax,%r14d\n\n\txorl\t%ebx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%ebx,%r11d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r11d\n\taddl\t%r12d,%edx\n\taddl\t%r12d,%r11d\n\tmovl\t40(%rsp),%r13d\n\tmovl\t28(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r11d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t8(%rsp),%r12d\n\n\taddl\t36(%rsp),%r12d\n\tmovl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r8d,%edi\n\n\txorl\t%edx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%edi\n\n\tmovl\t%r12d,36(%rsp)\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r10d,%r12d\n\txorl\t%r9d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%edx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r11d,%edi\n\taddl\t100(%rbp),%r12d\n\txorl\t%r11d,%r14d\n\n\txorl\t%eax,%edi\n\trorl\t$6,%r13d\n\tmovl\t%eax,%r10d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r10d\n\taddl\t%r12d,%ecx\n\taddl\t%r12d,%r10d\n\tmovl\t44(%rsp),%r13d\n\tmovl\t32(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r10d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t12(%rsp),%r12d\n\n\taddl\t40(%rsp),%r12d\n\tmovl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%edx,%r15d\n\n\txorl\t%ecx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r15d\n\n\tmovl\t%r12d,40(%rsp)\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%r9d,%r12d\n\txorl\t%r8d,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%ecx,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r10d,%r15d\n\taddl\t104(%rbp),%r12d\n\txorl\t%r10d,%r14d\n\n\txorl\t%r11d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r11d,%r9d\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%r9d\n\taddl\t%r12d,%ebx\n\taddl\t%r12d,%r9d\n\tmovl\t48(%rsp),%r13d\n\tmovl\t36(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r9d\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t16(%rsp),%r12d\n\n\taddl\t44(%rsp),%r12d\n\tmovl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ecx,%edi\n\n\txorl\t%ebx,%r13d\n\trorl\t$9,%r14d\n\txorl\t%edx,%edi\n\n\tmovl\t%r12d,44(%rsp)\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%r8d,%r12d\n\txorl\t%edx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%r9d,%edi\n\taddl\t108(%rbp),%r12d\n\txorl\t%r9d,%r14d\n\n\txorl\t%r10d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r10d,%r8d\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%r8d\n\taddl\t%r12d,%eax\n\taddl\t%r12d,%r8d\n\tmovl\t52(%rsp),%r13d\n\tmovl\t40(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%r8d\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t20(%rsp),%r12d\n\n\taddl\t48(%rsp),%r12d\n\tmovl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%ebx,%r15d\n\n\txorl\t%eax,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r15d\n\n\tmovl\t%r12d,48(%rsp)\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%edx,%r12d\n\txorl\t%ecx,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%eax,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%r8d,%r15d\n\taddl\t112(%rbp),%r12d\n\txorl\t%r8d,%r14d\n\n\txorl\t%r9d,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%r9d,%edx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%edx\n\taddl\t%r12d,%r11d\n\taddl\t%r12d,%edx\n\tmovl\t56(%rsp),%r13d\n\tmovl\t44(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%edx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t24(%rsp),%r12d\n\n\taddl\t52(%rsp),%r12d\n\tmovl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%eax,%edi\n\n\txorl\t%r11d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%edi\n\n\tmovl\t%r12d,52(%rsp)\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%ecx,%r12d\n\txorl\t%ebx,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%edx,%edi\n\taddl\t116(%rbp),%r12d\n\txorl\t%edx,%r14d\n\n\txorl\t%r8d,%edi\n\trorl\t$6,%r13d\n\tmovl\t%r8d,%ecx\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%ecx\n\taddl\t%r12d,%r10d\n\taddl\t%r12d,%ecx\n\tmovl\t60(%rsp),%r13d\n\tmovl\t48(%rsp),%r15d\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ecx\n\tmovl\t%r15d,%r14d\n\trorl\t$2,%r15d\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%r15d\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%r15d\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%r15d\n\taddl\t28(%rsp),%r12d\n\n\taddl\t56(%rsp),%r12d\n\tmovl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\tmovl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r11d,%r15d\n\n\txorl\t%r10d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r15d\n\n\tmovl\t%r12d,56(%rsp)\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r15d\n\n\trorl\t$5,%r13d\n\taddl\t%ebx,%r12d\n\txorl\t%eax,%r15d\n\n\trorl\t$11,%r14d\n\txorl\t%r10d,%r13d\n\taddl\t%r15d,%r12d\n\n\tmovl\t%ecx,%r15d\n\taddl\t120(%rbp),%r12d\n\txorl\t%ecx,%r14d\n\n\txorl\t%edx,%r15d\n\trorl\t$6,%r13d\n\tmovl\t%edx,%ebx\n\n\tandl\t%r15d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%edi,%ebx\n\taddl\t%r12d,%r9d\n\taddl\t%r12d,%ebx\n\tmovl\t0(%rsp),%r13d\n\tmovl\t52(%rsp),%edi\n\n\tmovl\t%r13d,%r12d\n\trorl\t$11,%r13d\n\taddl\t%r14d,%ebx\n\tmovl\t%edi,%r14d\n\trorl\t$2,%edi\n\n\txorl\t%r12d,%r13d\n\tshrl\t$3,%r12d\n\trorl\t$7,%r13d\n\txorl\t%r14d,%edi\n\tshrl\t$10,%r14d\n\n\trorl\t$17,%edi\n\txorl\t%r13d,%r12d\n\txorl\t%r14d,%edi\n\taddl\t32(%rsp),%r12d\n\n\taddl\t60(%rsp),%r12d\n\tmovl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\tmovl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r10d,%edi\n\n\txorl\t%r9d,%r13d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%edi\n\n\tmovl\t%r12d,60(%rsp)\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%edi\n\n\trorl\t$5,%r13d\n\taddl\t%eax,%r12d\n\txorl\t%r11d,%edi\n\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r13d\n\taddl\t%edi,%r12d\n\n\tmovl\t%ebx,%edi\n\taddl\t124(%rbp),%r12d\n\txorl\t%ebx,%r14d\n\n\txorl\t%ecx,%edi\n\trorl\t$6,%r13d\n\tmovl\t%ecx,%eax\n\n\tandl\t%edi,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r13d,%r12d\n\n\txorl\t%r15d,%eax\n\taddl\t%r12d,%r8d\n\taddl\t%r12d,%eax\n\tleaq\t64(%rbp),%rbp\n\tcmpb\t$0x19,3(%rbp)\n\tjnz\tL$rounds_16_xx\n\n\tmovq\t64+0(%rsp),%rdi\n\taddl\t%r14d,%eax\n\tleaq\t64(%rsi),%rsi\n\n\taddl\t0(%rdi),%eax\n\taddl\t4(%rdi),%ebx\n\taddl\t8(%rdi),%ecx\n\taddl\t12(%rdi),%edx\n\taddl\t16(%rdi),%r8d\n\taddl\t20(%rdi),%r9d\n\taddl\t24(%rdi),%r10d\n\taddl\t28(%rdi),%r11d\n\n\tcmpq\t64+16(%rsp),%rsi\n\n\tmovl\t%eax,0(%rdi)\n\tmovl\t%ebx,4(%rdi)\n\tmovl\t%ecx,8(%rdi)\n\tmovl\t%edx,12(%rdi)\n\tmovl\t%r8d,16(%rdi)\n\tmovl\t%r9d,20(%rdi)\n\tmovl\t%r10d,24(%rdi)\n\tmovl\t%r11d,28(%rdi)\n\tjb\tL$loop\n\n\tleaq\t64+24+48(%rsp),%r11\n.cfi_def_cfa\t%r11,8\n\tmovq\t64+24(%rsp),%r15\n\tmovq\t-40(%r11),%r14\n\tmovq\t-32(%r11),%r13\n\tmovq\t-24(%r11),%r12\n\tmovq\t-16(%r11),%rbx\n\tmovq\t-8(%r11),%rbp\n.cfi_restore\t%r12\n.cfi_restore\t%r13\n.cfi_restore\t%r14\n.cfi_restore\t%r15\n.cfi_restore\t%rbp\n.cfi_restore\t%rbx\n\tleaq\t(%r11),%rsp\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n\n#ifndef __BLST_PORTABLE__\n.section\t__TEXT,__const\n.p2align\t6\n\nK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.globl\t_blst_sha256_emit\n.private_extern\t_blst_sha256_emit\n\n.p2align\t4\n_blst_sha256_emit:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tbswapq\t%r8\n\tmovq\t24(%rsi),%r11\n\tbswapq\t%r9\n\tmovl\t%r8d,4(%rdi)\n\tbswapq\t%r10\n\tmovl\t%r9d,12(%rdi)\n\tbswapq\t%r11\n\tmovl\t%r10d,20(%rdi)\n\tshrq\t$32,%r8\n\tmovl\t%r11d,28(%rdi)\n\tshrq\t$32,%r9\n\tmovl\t%r8d,0(%rdi)\n\tshrq\t$32,%r10\n\tmovl\t%r9d,8(%rdi)\n\tshrq\t$32,%r11\n\tmovl\t%r10d,16(%rdi)\n\tmovl\t%r11d,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_blst_sha256_bcopy\n.private_extern\t_blst_sha256_bcopy\n\n.p2align\t4\n_blst_sha256_bcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tsubq\t%rsi,%rdi\nL$oop_bcopy:\n\tmovzbl\t(%rsi),%eax\n\tleaq\t1(%rsi),%rsi\n\tmovb\t%al,-1(%rdi,%rsi,1)\n\tdecq\t%rdx\n\tjnz\tL$oop_bcopy\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_blst_sha256_hcopy\n.private_extern\t_blst_sha256_hcopy\n\n.p2align\t4\n_blst_sha256_hcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n#endif\n"
  },
  {
    "path": "build/mach-o/sha256-x86_64.s",
    "content": ".comm\t___blst_platform_cap,4\n\n.section\t__TEXT,__const\n.p2align\t6\n\nK256:\n.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n.long\t0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f\n.long\t0x03020100,0x0b0a0908,0xffffffff,0xffffffff\n.long\t0xffffffff,0xffffffff,0x03020100,0x0b0a0908\n.byte\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,64,100,111,116,45,97,115,109,0\n.text\t\n.globl\t_blst_sha256_block_data_order_shaext\n.private_extern\t_blst_sha256_block_data_order_shaext\n\n.p2align\t6\n_blst_sha256_block_data_order_shaext:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tmovq\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\nL$blst_sha256_block_data_order$2:\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tleaq\tK256+128(%rip),%rcx\n\tmovdqu\t(%rdi),%xmm1\n\tmovdqu\t16(%rdi),%xmm2\n\tmovdqa\t256-128(%rcx),%xmm7\n\n\tpshufd\t$0x1b,%xmm1,%xmm0\n\tpshufd\t$0xb1,%xmm1,%xmm1\n\tpshufd\t$0x1b,%xmm2,%xmm2\n\tmovdqa\t%xmm7,%xmm8\n.byte\t102,15,58,15,202,8\n\tpunpcklqdq\t%xmm0,%xmm2\n\tjmp\tL$oop_shaext\n\n.p2align\t4\nL$oop_shaext:\n\tmovdqu\t(%rsi),%xmm3\n\tmovdqu\t16(%rsi),%xmm4\n\tmovdqu\t32(%rsi),%xmm5\n.byte\t102,15,56,0,223\n\tmovdqu\t48(%rsi),%xmm6\n\n\tmovdqa\t0-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t102,15,56,0,231\n\tmovdqa\t%xmm2,%xmm10\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tnop\n\tmovdqa\t%xmm1,%xmm9\n.byte\t15,56,203,202\n\n\tmovdqa\t16-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t102,15,56,0,239\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tleaq\t64(%rsi),%rsi\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\n\tmovdqa\t32-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t102,15,56,0,247\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\n\tmovdqa\t48-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t64-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t80-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n\tnop\n\tpaddd\t%xmm7,%xmm6\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\tmovdqa\t96-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,205,245\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\tmovdqa\t112-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t128-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t144-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n\tnop\n\tpaddd\t%xmm7,%xmm6\n.byte\t15,56,204,220\n.byte\t15,56,203,202\n\tmovdqa\t160-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,205,245\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm6,%xmm7\n.byte\t102,15,58,15,253,4\n\tnop\n\tpaddd\t%xmm7,%xmm3\n.byte\t15,56,204,229\n.byte\t15,56,203,202\n\tmovdqa\t176-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n.byte\t15,56,205,222\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm3,%xmm7\n.byte\t102,15,58,15,254,4\n\tnop\n\tpaddd\t%xmm7,%xmm4\n.byte\t15,56,204,238\n.byte\t15,56,203,202\n\tmovdqa\t192-128(%rcx),%xmm0\n\tpaddd\t%xmm3,%xmm0\n.byte\t15,56,205,227\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm4,%xmm7\n.byte\t102,15,58,15,251,4\n\tnop\n\tpaddd\t%xmm7,%xmm5\n.byte\t15,56,204,243\n.byte\t15,56,203,202\n\tmovdqa\t208-128(%rcx),%xmm0\n\tpaddd\t%xmm4,%xmm0\n.byte\t15,56,205,236\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tmovdqa\t%xmm5,%xmm7\n.byte\t102,15,58,15,252,4\n.byte\t15,56,203,202\n\tpaddd\t%xmm7,%xmm6\n\n\tmovdqa\t224-128(%rcx),%xmm0\n\tpaddd\t%xmm5,%xmm0\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n.byte\t15,56,205,245\n\tmovdqa\t%xmm8,%xmm7\n.byte\t15,56,203,202\n\n\tmovdqa\t240-128(%rcx),%xmm0\n\tpaddd\t%xmm6,%xmm0\n\tnop\n.byte\t15,56,203,209\n\tpshufd\t$0x0e,%xmm0,%xmm0\n\tdecq\t%rdx\n\tnop\n.byte\t15,56,203,202\n\n\tpaddd\t%xmm10,%xmm2\n\tpaddd\t%xmm9,%xmm1\n\tjnz\tL$oop_shaext\n\n\tpshufd\t$0xb1,%xmm2,%xmm2\n\tpshufd\t$0x1b,%xmm1,%xmm7\n\tpshufd\t$0xb1,%xmm1,%xmm1\n\tpunpckhqdq\t%xmm2,%xmm1\n.byte\t102,15,58,15,215,8\n\n\tmovdqu\t%xmm1,(%rdi)\n\tmovdqu\t%xmm2,16(%rdi)\n.cfi_def_cfa_register\t%rsp\n\tpopq\t%rbp\n.cfi_adjust_cfa_offset\t-8\n.cfi_restore\t%rbp\n\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_blst_sha256_block_data_order\n.private_extern\t_blst_sha256_block_data_order\n\n.p2align\t6\n_blst_sha256_block_data_order:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n\n\tpushq\t%rbp\n.cfi_adjust_cfa_offset\t8\n.cfi_offset\t%rbp,-16\n\tmovq\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n#ifndef\t__SGX_LVI_HARDENING__\n\ttestl\t$2,___blst_platform_cap(%rip)\n\tjnz\tL$blst_sha256_block_data_order$2\n#endif\n\tpushq\t%rbx\n.cfi_offset\t%rbx,-24\n\tpushq\t%r12\n.cfi_offset\t%r12,-32\n\tpushq\t%r13\n.cfi_offset\t%r13,-40\n\tpushq\t%r14\n.cfi_offset\t%r14,-48\n\tpushq\t%r15\n.cfi_offset\t%r15,-56\n\tshlq\t$4,%rdx\n\tsubq\t$24,%rsp\n\n\tleaq\t(%rsi,%rdx,4),%rdx\n\tmovq\t%rdi,-64(%rbp)\n\n\tmovq\t%rdx,-48(%rbp)\n\n\n\tleaq\t-64(%rsp),%rsp\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovl\t0(%rdi),%eax\n\tandq\t$-64,%rsp\n\tmovl\t4(%rdi),%ebx\n\tmovl\t8(%rdi),%ecx\n\tmovl\t12(%rdi),%edx\n\tmovl\t16(%rdi),%r8d\n\tmovl\t20(%rdi),%r9d\n\tmovl\t24(%rdi),%r10d\n\tmovl\t28(%rdi),%r11d\n\n\n\tjmp\tL$loop_ssse3\n.p2align\t4\nL$loop_ssse3:\n\tmovdqa\tK256+256(%rip),%xmm7\n\tmovq\t%rsi,-56(%rbp)\n\tmovdqu\t0(%rsi),%xmm0\n\tmovdqu\t16(%rsi),%xmm1\n\tmovdqu\t32(%rsi),%xmm2\n.byte\t102,15,56,0,199\n\tmovdqu\t48(%rsi),%xmm3\n\tleaq\tK256(%rip),%rsi\n.byte\t102,15,56,0,207\n\tmovdqa\t0(%rsi),%xmm4\n\tmovdqa\t16(%rsi),%xmm5\n.byte\t102,15,56,0,215\n\tpaddd\t%xmm0,%xmm4\n\tmovdqa\t32(%rsi),%xmm6\n.byte\t102,15,56,0,223\n\tmovdqa\t48(%rsi),%xmm7\n\tpaddd\t%xmm1,%xmm5\n\tpaddd\t%xmm2,%xmm6\n\tpaddd\t%xmm3,%xmm7\n\tmovdqa\t%xmm4,0(%rsp)\n\tmovl\t%eax,%r14d\n\tmovdqa\t%xmm5,16(%rsp)\n\tmovl\t%ebx,%edi\n\tmovdqa\t%xmm6,32(%rsp)\n\txorl\t%ecx,%edi\n\tmovdqa\t%xmm7,48(%rsp)\n\tmovl\t%r8d,%r13d\n\tjmp\tL$ssse3_00_47\n\n.p2align\t4\nL$ssse3_00_47:\n\tsubq\t$-64,%rsi\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm1,%xmm4\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\tmovdqa\t%xmm3,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n.byte\t102,15,58,15,224,4\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n.byte\t102,15,58,15,250,4\n\taddl\t0(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\tpaddd\t%xmm7,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\tpshufd\t$250,%xmm3,%xmm7\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%r11d,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t4(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\tpaddd\t%xmm4,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%ecx,%r13d\n\taddl\t8(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm0\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\tpshufd\t$80,%xmm0,%xmm7\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t12(%rsp),%r8d\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\tmovdqa\t0(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\tpaddd\t%xmm7,%xmm0\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tpaddd\t%xmm0,%xmm6\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\tmovdqa\t%xmm6,0(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm2,%xmm4\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\tmovdqa\t%xmm0,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n.byte\t102,15,58,15,225,4\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n.byte\t102,15,58,15,251,4\n\taddl\t16(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\tpaddd\t%xmm7,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\tpshufd\t$250,%xmm0,%xmm7\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%edx,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t20(%rsp),%ecx\n\tmovl\t%edx,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\tpaddd\t%xmm4,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%r10d,%r13d\n\taddl\t24(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm1\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\tpshufd\t$80,%xmm1,%xmm7\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t28(%rsp),%eax\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\tmovdqa\t16(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\tpaddd\t%xmm7,%xmm1\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tpaddd\t%xmm1,%xmm6\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovdqa\t%xmm6,16(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm3,%xmm4\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\tmovdqa\t%xmm1,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n.byte\t102,15,58,15,226,4\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n.byte\t102,15,58,15,248,4\n\taddl\t32(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\tpaddd\t%xmm7,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\tpshufd\t$250,%xmm1,%xmm7\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%r11d,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t36(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\tpaddd\t%xmm4,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%ecx,%r13d\n\taddl\t40(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm2\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\tpshufd\t$80,%xmm2,%xmm7\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t44(%rsp),%r8d\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\tmovdqa\t32(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\tpaddd\t%xmm7,%xmm2\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tpaddd\t%xmm2,%xmm6\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\tmovdqa\t%xmm6,32(%rsp)\n\trorl\t$14,%r13d\n\tmovdqa\t%xmm0,%xmm4\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\tmovdqa\t%xmm2,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n.byte\t102,15,58,15,227,4\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n.byte\t102,15,58,15,249,4\n\taddl\t48(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm4,%xmm5\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\tmovdqa\t%xmm4,%xmm6\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\tpsrld\t$3,%xmm4\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\tpaddd\t%xmm7,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\tpsrld\t$7,%xmm6\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\tpshufd\t$250,%xmm2,%xmm7\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tpslld\t$14,%xmm5\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm4\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\tpsrld\t$11,%xmm6\n\txorl\t%edx,%r14d\n\tpxor\t%xmm5,%xmm4\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\tpslld\t$11,%xmm5\n\taddl\t52(%rsp),%ecx\n\tmovl\t%edx,%edi\n\tpxor\t%xmm6,%xmm4\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\tmovdqa\t%xmm7,%xmm6\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\tpxor\t%xmm5,%xmm4\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\tpsrld\t$10,%xmm7\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\tpaddd\t%xmm4,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\tpsrlq\t$17,%xmm6\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\tpsrlq\t$2,%xmm6\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\tpshufd\t$128,%xmm7,%xmm7\n\txorl\t%r10d,%r13d\n\taddl\t56(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\tpsrldq\t$8,%xmm7\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tpaddd\t%xmm7,%xmm3\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\tpshufd\t$80,%xmm3,%xmm7\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\tmovdqa\t%xmm7,%xmm6\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\tpsrld\t$10,%xmm7\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tpsrlq\t$17,%xmm6\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\tpxor\t%xmm6,%xmm7\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tpsrlq\t$2,%xmm6\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t60(%rsp),%eax\n\tpxor\t%xmm6,%xmm7\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\tpshufd\t$8,%xmm7,%xmm7\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\tmovdqa\t48(%rsi),%xmm6\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\tpslldq\t$8,%xmm7\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\tpaddd\t%xmm7,%xmm3\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tpaddd\t%xmm3,%xmm6\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovdqa\t%xmm6,48(%rsp)\n\tcmpb\t$0,67(%rsi)\n\tjne\tL$ssse3_00_47\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n\taddl\t0(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\taddl\t4(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\txorl\t%ecx,%r13d\n\taddl\t8(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t12(%rsp),%r8d\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n\taddl\t16(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\taddl\t20(%rsp),%ecx\n\tmovl\t%edx,%edi\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\txorl\t%r10d,%r13d\n\taddl\t24(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t28(%rsp),%eax\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%eax\n\tmovl\t%r9d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r8d,%r13d\n\txorl\t%r10d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%eax,%r14d\n\tandl\t%r8d,%r12d\n\txorl\t%r8d,%r13d\n\taddl\t32(%rsp),%r11d\n\tmovl\t%eax,%r15d\n\txorl\t%r10d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ebx,%r15d\n\taddl\t%r12d,%r11d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%eax,%r14d\n\taddl\t%r13d,%r11d\n\txorl\t%ebx,%edi\n\trorl\t$2,%r14d\n\taddl\t%r11d,%edx\n\taddl\t%edi,%r11d\n\tmovl\t%edx,%r13d\n\taddl\t%r11d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r11d\n\tmovl\t%r8d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%edx,%r13d\n\txorl\t%r9d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r11d,%r14d\n\tandl\t%edx,%r12d\n\txorl\t%edx,%r13d\n\taddl\t36(%rsp),%r10d\n\tmovl\t%r11d,%edi\n\txorl\t%r9d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%eax,%edi\n\taddl\t%r12d,%r10d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r11d,%r14d\n\taddl\t%r13d,%r10d\n\txorl\t%eax,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r10d,%ecx\n\taddl\t%r15d,%r10d\n\tmovl\t%ecx,%r13d\n\taddl\t%r10d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r10d\n\tmovl\t%edx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ecx,%r13d\n\txorl\t%r8d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r10d,%r14d\n\tandl\t%ecx,%r12d\n\txorl\t%ecx,%r13d\n\taddl\t40(%rsp),%r9d\n\tmovl\t%r10d,%r15d\n\txorl\t%r8d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r11d,%r15d\n\taddl\t%r12d,%r9d\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r10d,%r14d\n\taddl\t%r13d,%r9d\n\txorl\t%r11d,%edi\n\trorl\t$2,%r14d\n\taddl\t%r9d,%ebx\n\taddl\t%edi,%r9d\n\tmovl\t%ebx,%r13d\n\taddl\t%r9d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r9d\n\tmovl\t%ecx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%ebx,%r13d\n\txorl\t%edx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r9d,%r14d\n\tandl\t%ebx,%r12d\n\txorl\t%ebx,%r13d\n\taddl\t44(%rsp),%r8d\n\tmovl\t%r9d,%edi\n\txorl\t%edx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r10d,%edi\n\taddl\t%r12d,%r8d\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%r9d,%r14d\n\taddl\t%r13d,%r8d\n\txorl\t%r10d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%r8d,%eax\n\taddl\t%r15d,%r8d\n\tmovl\t%eax,%r13d\n\taddl\t%r8d,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%r8d\n\tmovl\t%ebx,%r12d\n\trorl\t$9,%r14d\n\txorl\t%eax,%r13d\n\txorl\t%ecx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%r8d,%r14d\n\tandl\t%eax,%r12d\n\txorl\t%eax,%r13d\n\taddl\t48(%rsp),%edx\n\tmovl\t%r8d,%r15d\n\txorl\t%ecx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r9d,%r15d\n\taddl\t%r12d,%edx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%r8d,%r14d\n\taddl\t%r13d,%edx\n\txorl\t%r9d,%edi\n\trorl\t$2,%r14d\n\taddl\t%edx,%r11d\n\taddl\t%edi,%edx\n\tmovl\t%r11d,%r13d\n\taddl\t%edx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%edx\n\tmovl\t%eax,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r11d,%r13d\n\txorl\t%ebx,%r12d\n\trorl\t$5,%r13d\n\txorl\t%edx,%r14d\n\tandl\t%r11d,%r12d\n\txorl\t%r11d,%r13d\n\taddl\t52(%rsp),%ecx\n\tmovl\t%edx,%edi\n\txorl\t%ebx,%r12d\n\trorl\t$11,%r14d\n\txorl\t%r8d,%edi\n\taddl\t%r12d,%ecx\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%edx,%r14d\n\taddl\t%r13d,%ecx\n\txorl\t%r8d,%r15d\n\trorl\t$2,%r14d\n\taddl\t%ecx,%r10d\n\taddl\t%r15d,%ecx\n\tmovl\t%r10d,%r13d\n\taddl\t%ecx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ecx\n\tmovl\t%r11d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r10d,%r13d\n\txorl\t%eax,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ecx,%r14d\n\tandl\t%r10d,%r12d\n\txorl\t%r10d,%r13d\n\taddl\t56(%rsp),%ebx\n\tmovl\t%ecx,%r15d\n\txorl\t%eax,%r12d\n\trorl\t$11,%r14d\n\txorl\t%edx,%r15d\n\taddl\t%r12d,%ebx\n\trorl\t$6,%r13d\n\tandl\t%r15d,%edi\n\txorl\t%ecx,%r14d\n\taddl\t%r13d,%ebx\n\txorl\t%edx,%edi\n\trorl\t$2,%r14d\n\taddl\t%ebx,%r9d\n\taddl\t%edi,%ebx\n\tmovl\t%r9d,%r13d\n\taddl\t%ebx,%r14d\n\trorl\t$14,%r13d\n\tmovl\t%r14d,%ebx\n\tmovl\t%r10d,%r12d\n\trorl\t$9,%r14d\n\txorl\t%r9d,%r13d\n\txorl\t%r11d,%r12d\n\trorl\t$5,%r13d\n\txorl\t%ebx,%r14d\n\tandl\t%r9d,%r12d\n\txorl\t%r9d,%r13d\n\taddl\t60(%rsp),%eax\n\tmovl\t%ebx,%edi\n\txorl\t%r11d,%r12d\n\trorl\t$11,%r14d\n\txorl\t%ecx,%edi\n\taddl\t%r12d,%eax\n\trorl\t$6,%r13d\n\tandl\t%edi,%r15d\n\txorl\t%ebx,%r14d\n\taddl\t%r13d,%eax\n\txorl\t%ecx,%r15d\n\trorl\t$2,%r14d\n\taddl\t%eax,%r8d\n\taddl\t%r15d,%eax\n\tmovl\t%r8d,%r13d\n\taddl\t%eax,%r14d\n\tmovq\t-64(%rbp),%rdi\n\tmovl\t%r14d,%eax\n\tmovq\t-56(%rbp),%rsi\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\taddl\t0(%rdi),%eax\n\taddl\t4(%rdi),%ebx\n\taddl\t8(%rdi),%ecx\n\taddl\t12(%rdi),%edx\n\taddl\t16(%rdi),%r8d\n\taddl\t20(%rdi),%r9d\n\taddl\t24(%rdi),%r10d\n\taddl\t28(%rdi),%r11d\n\n\tleaq\t64(%rsi),%rsi\n\tcmpq\t-48(%rbp),%rsi\n\n\tmovl\t%eax,0(%rdi)\n\tmovl\t%ebx,4(%rdi)\n\tmovl\t%ecx,8(%rdi)\n\tmovl\t%edx,12(%rdi)\n\tmovl\t%r8d,16(%rdi)\n\tmovl\t%r9d,20(%rdi)\n\tmovl\t%r10d,24(%rdi)\n\tmovl\t%r11d,28(%rdi)\n\tjb\tL$loop_ssse3\n\n\txorps\t%xmm0,%xmm0\n\tmovaps\t%xmm0,0(%rsp)\n\tmovaps\t%xmm0,16(%rsp)\n\tmovaps\t%xmm0,32(%rsp)\n\tmovaps\t%xmm0,48(%rsp)\n\tmovq\t-40(%rbp),%r15\n\tmovq\t-32(%rbp),%r14\n\tmovq\t-24(%rbp),%r13\n\tmovq\t-16(%rbp),%r12\n\tmovq\t-8(%rbp),%rbx\n\tmovq\t%rbp,%rsp\n.cfi_def_cfa_register\t%rsp\n\tpopq\t%rbp\n.cfi_adjust_cfa_offset\t-8\n.cfi_restore\t%rbp\n.cfi_restore\t%r12\n.cfi_restore\t%r13\n.cfi_restore\t%r14\n.cfi_restore\t%r15\n.cfi_restore\t%rbx\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\t\n\n.globl\t_blst_sha256_emit\n.private_extern\t_blst_sha256_emit\n\n.p2align\t4\n_blst_sha256_emit:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tbswapq\t%r8\n\tmovq\t24(%rsi),%r11\n\tbswapq\t%r9\n\tmovl\t%r8d,4(%rdi)\n\tbswapq\t%r10\n\tmovl\t%r9d,12(%rdi)\n\tbswapq\t%r11\n\tmovl\t%r10d,20(%rdi)\n\tshrq\t$32,%r8\n\tmovl\t%r11d,28(%rdi)\n\tshrq\t$32,%r9\n\tmovl\t%r8d,0(%rdi)\n\tshrq\t$32,%r10\n\tmovl\t%r9d,8(%rdi)\n\tshrq\t$32,%r11\n\tmovl\t%r10d,16(%rdi)\n\tmovl\t%r11d,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_blst_sha256_bcopy\n.private_extern\t_blst_sha256_bcopy\n\n.p2align\t4\n_blst_sha256_bcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tsubq\t%rsi,%rdi\nL$oop_bcopy:\n\tmovzbl\t(%rsi),%eax\n\tleaq\t1(%rsi),%rsi\n\tmovb\t%al,-1(%rdi,%rsi,1)\n\tdecq\t%rdx\n\tjnz\tL$oop_bcopy\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n\n.globl\t_blst_sha256_hcopy\n.private_extern\t_blst_sha256_hcopy\n\n.p2align\t4\n_blst_sha256_hcopy:\n.cfi_startproc\n\t.byte\t0xf3,0x0f,0x1e,0xfa\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovq\t0(%rsi),%r8\n\tmovq\t8(%rsi),%r9\n\tmovq\t16(%rsi),%r10\n\tmovq\t24(%rsi),%r11\n\tmovq\t%r8,0(%rdi)\n\tmovq\t%r9,8(%rdi)\n\tmovq\t%r10,16(%rdi)\n\tmovq\t%r11,24(%rdi)\n\t\n#ifdef\t__SGX_LVI_HARDENING__\n\tpopq\t%rdx\n\tlfence\n\tjmpq\t*%rdx\n\tud2\n#else\n\t.byte\t0xf3,0xc3\n#endif\n.cfi_endproc\n\n"
  },
  {
    "path": "build/refresh.sh",
    "content": "#!/bin/sh\n\nHERE=`dirname $0`\ncd \"${HERE}\"\n\nPERL=${PERL:-perl}\n\nfor pl in ../src/asm/*-x86_64.pl; do\n    s=`basename $pl .pl`.asm\n    expr $s : '.*portable' > /dev/null || (set -x; ${PERL} $pl masm > win64/$s)\n    s=`basename $pl .pl`.s\n    (set -x; ${PERL} $pl elf > elf/$s)\n    (set -x; ${PERL} $pl mingw64 > coff/$s)\n    (set -x; ${PERL} $pl macosx > mach-o/$s)\ndone\n\nfor pl in ../src/asm/*-armv8.pl; do\n    s=`basename $pl .pl`.asm\n    (set -x; ${PERL} $pl win64 > win64/$s)\n    s=`basename $pl .pl`.S\n    (set -x; ${PERL} $pl linux64 > elf/$s)\n    (set -x; ${PERL} $pl coff64 > coff/$s)\n    (set -x; ${PERL} $pl ios64 > mach-o/$s)\n    (set -x; ${PERL} $pl cheri64 > cheri/$s)\ndone\n\n( cd ../bindings;\n  echo \"LIBRARY blst\"\n  echo\n  echo \"EXPORTS\"\n  cc -E blst.h | \\\n  ${PERL} -ne '{ (/(blst_[\\w]+)\\s*\\(/ || /(BLS12_[\\w]+);/) &&  print \"\\t$1\\n\" }'\n  echo\n) > win64/blst.def\n\nif which bindgen > /dev/null 2>&1; then\n  ( cd ../bindings; set -x;\n    bindgen --opaque-type blst_pairing \\\n            --opaque-type blst_uniq \\\n            --with-derive-default \\\n            --with-derive-eq \\\n            --rustified-enum BLST.\\* \\\n        blst.h -- -D__BLST_RUST_BINDGEN__ \\\n    | ${PERL} ../build/bindings_trim.pl > rust/src/bindings.rs\n  )\nelse\n    echo \"Install Rust bindgen with 'cargo install bindgen-cli'\" 1>&2\n    exit 1\nfi\n"
  },
  {
    "path": "build/srcroot.go",
    "content": "package blst\n\nimport (\n    \"path/filepath\"\n    \"runtime\"\n)\n\nvar SrcRoot string\n\nfunc init() {\n    if _, self, _, ok := runtime.Caller(0); ok {\n        SrcRoot = filepath.Dir(filepath.Dir(self))\n    }\n}\n"
  },
  {
    "path": "build/win64/add_mod_256-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|add_mod_256|[FUNC]\n\tALIGN\t32\n|add_mod_256| PROC\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tadds\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tadcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tadcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx8,x8,x16\n\tcsello\tx9,x9,x17\n\tcsello\tx10,x10,x1\n\tstp\tx8,x9,[x0]\n\tcsello\tx11,x11,x2\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_by_3_mod_256|[FUNC]\n\tALIGN\t32\n|mul_by_3_mod_256| PROC\n\thint\t#34\n\tldp\tx12,x13,[x1]\n\tldp\tx14,x15,[x1,#16]\n\n\tadds\tx8,x12,x12\n\tldp\tx4,x5,[x2]\n\tadcs\tx9,x13,x13\n\tldp\tx6,x7,[x2,#16]\n\tadcs\tx10,x14,x14\n\tadcs\tx11,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx8,x8,x16\n\tcsello\tx9,x9,x17\n\tcsello\tx10,x10,x1\n\tcsello\tx11,x11,x2\n\n\tadds\tx8,x8,x12\n\tadcs\tx9,x9,x13\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx8,x8,x16\n\tcsello\tx9,x9,x17\n\tcsello\tx10,x10,x1\n\tstp\tx8,x9,[x0]\n\tcsello\tx11,x11,x2\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|lshift_mod_256|[FUNC]\n\tALIGN\t32\n|lshift_mod_256| PROC\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\n|$Loop_lshift_mod_256|\n\tadds\tx8,x8,x8\n\tsub\tx2,x2,#1\n\tadcs\tx9,x9,x9\n\tadcs\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx12,x8,x4\n\tsbcs\tx13,x9,x5\n\tsbcs\tx14,x10,x6\n\tsbcs\tx15,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx8,x8,x12\n\tcsello\tx9,x9,x13\n\tcsello\tx10,x10,x14\n\tcsello\tx11,x11,x15\n\n\tcbnz\tx2,|$Loop_lshift_mod_256|\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|rshift_mod_256|[FUNC]\n\tALIGN\t32\n|rshift_mod_256| PROC\n\thint\t#34\n\tldp\tx8,x9,[x1]\n\tldp\tx10,x11,[x1,#16]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\n|$Loop_rshift|\n\tadds\tx12,x8,x4\n\tsub\tx2,x2,#1\n\tadcs\tx13,x9,x5\n\tadcs\tx14,x10,x6\n\tadcs\tx15,x11,x7\n\tadc\tx3,xzr,xzr\n\ttst\tx8,#1\n\n\tcselne\tx12,x12,x8\n\tcselne\tx13,x13,x9\n\tcselne\tx14,x14,x10\n\tcselne\tx15,x15,x11\n\tcselne\tx3,x3,xzr\n\n\textr\tx8,x13,x12,#1\n\textr\tx9,x14,x13,#1\n\textr\tx10,x15,x14,#1\n\textr\tx11,x3,x15,#1\n\n\tcbnz\tx2,|$Loop_rshift|\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|cneg_mod_256|[FUNC]\n\tALIGN\t32\n|cneg_mod_256| PROC\n\tldp\tx8,x9,[x1]\n\tldp\tx4,x5,[x3]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx12,x4,x8\n\tldp\tx6,x7,[x3,#16]\n\torr\tx4,x8,x9\n\tsbcs\tx13,x5,x9\n\torr\tx5,x10,x11\n\tsbcs\tx14,x6,x10\n\torr\tx3,x4,x5\n\tsbc\tx15,x7,x11\n\n\tcmp\tx3,#0\n\tcsetmne\tx3\n\tands\tx2,x2,x3\n\n\tcseleq\tx8,x8,x12\n\tcseleq\tx9,x9,x13\n\tcseleq\tx10,x10,x14\n\tstp\tx8,x9,[x0]\n\tcseleq\tx11,x11,x15\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sub_mod_256|[FUNC]\n\tALIGN\t32\n|sub_mod_256| PROC\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\n\tldp\tx10,x11,[x1,#16]\n\tsubs\tx8,x8,x12\n\tldp\tx14,x15,[x2,#16]\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tstp\tx8,x9,[x0]\n\tadc\tx11,x11,x7\n\tstp\tx10,x11,[x0,#16]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|check_mod_256|[FUNC]\n\tALIGN\t32\n|check_mod_256| PROC\n\tldp\tx8,x9,[x0]\n\tldp\tx10,x11,[x0,#16]\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\n if :def:\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n endif\n\n\tsubs\txzr,x8,x4\n\tsbcs\txzr,x9,x5\n\torr\tx8,x8,x9\n\tsbcs\txzr,x10,x6\n\torr\tx8,x8,x10\n\tsbcs\txzr,x11,x7\n\torr\tx8,x8,x11\n\tsbc\tx1,xzr,xzr\n\n\tcmp\tx8,#0\n\tmov\tx0,#1\n\tcselne\tx0,x0,xzr\n\tand\tx0,x0,x1\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|add_n_check_mod_256|[FUNC]\n\tALIGN\t32\n|add_n_check_mod_256| PROC\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n if :def:\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n endif\n\n\tadds\tx8,x8,x12\n\tldp\tx4,x5,[x3]\n\tadcs\tx9,x9,x13\n\tldp\tx6,x7,[x3,#16]\n\tadcs\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x8,x4\n\tsbcs\tx17,x9,x5\n\tsbcs\tx1,x10,x6\n\tsbcs\tx2,x11,x7\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx8,x8,x16\n\tcsello\tx9,x9,x17\n\tcsello\tx10,x10,x1\n\tcsello\tx11,x11,x2\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n if :def:\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcselne\tx0,x17,xzr\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sub_n_check_mod_256|[FUNC]\n\tALIGN\t32\n|sub_n_check_mod_256| PROC\n\tldp\tx8,x9,[x1]\n\tldp\tx12,x13,[x2]\n\tldp\tx10,x11,[x1,#16]\n\tldp\tx14,x15,[x2,#16]\n\n if :def:\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx12,x12\n\trev\tx9,x9\n\trev\tx13,x13\n\trev\tx10,x10\n\trev\tx14,x14\n\trev\tx11,x11\n\trev\tx15,x15\n endif\n\n\tsubs\tx8,x8,x12\n\tsbcs\tx9,x9,x13\n\tldp\tx4,x5,[x3]\n\tsbcs\tx10,x10,x14\n\tldp\tx6,x7,[x3,#16]\n\tsbcs\tx11,x11,x15\n\tsbc\tx3,xzr,xzr\n\n\tand\tx4,x4,x3\n\tand\tx5,x5,x3\n\tadds\tx8,x8,x4\n\tand\tx6,x6,x3\n\tadcs\tx9,x9,x5\n\tand\tx7,x7,x3\n\tadcs\tx10,x10,x6\n\tadc\tx11,x11,x7\n\n\torr\tx16, x8, x9\n\torr\tx17, x10, x11\n\torr\tx16, x16, x17\n\n if :def:\t__AARCH64EB__\n\trev\tx8,x8\n\trev\tx9,x9\n\trev\tx10,x10\n\trev\tx11,x11\n endif\n\n\tstp\tx8,x9,[x0]\n\tstp\tx10,x11,[x0,#16]\n\n\tmov\tx17, #1\n\tcmp\tx16, #0\n\tcselne\tx0,x17,xzr\n\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/add_mod_256-x86_64.asm",
    "content": "OPTION\tDOTNAME\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tadd_mod_256\n\n\nALIGN\t32\nadd_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_add_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tsub\trsp,8\n\n$L$SEH_body_add_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n$L$oaded_a_add_mod_256::\n\tadd\tr8,QWORD PTR[rdx]\n\tadc\tr9,QWORD PTR[8+rdx]\n\tmov\trax,r8\n\tadc\tr10,QWORD PTR[16+rdx]\n\tmov\trsi,r9\n\tadc\tr11,QWORD PTR[24+rdx]\n\tsbb\trdx,rdx\n\n\tmov\trbx,r10\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tmov\trbp,r11\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr8,rax\n\tcmovc\tr9,rsi\n\tmov\tQWORD PTR[rdi],r8\n\tcmovc\tr10,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tcmovc\tr11,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_add_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_add_mod_256::\nadd_mod_256\tENDP\n\n\nPUBLIC\tmul_by_3_mod_256\n\n\nALIGN\t32\nmul_by_3_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_by_3_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n$L$SEH_body_mul_by_3_mod_256::\n\n\n\tmov\trcx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\trdx,rsi\n\tmov\tr11,QWORD PTR[24+rsi]\n\n\tcall\t__lshift_mod_256\n\tmov\tr12,QWORD PTR[rsp]\n\n\tjmp\t$L$oaded_a_add_mod_256\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_mul_by_3_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_by_3_mod_256::\nmul_by_3_mod_256\tENDP\n\n\nALIGN\t32\n__lshift_mod_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tadd\tr8,r8\n\tadc\tr9,r9\n\tmov\trax,r8\n\tadc\tr10,r10\n\tmov\trsi,r9\n\tadc\tr11,r11\n\tsbb\tr12,r12\n\n\tmov\trbx,r10\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tmov\trbp,r11\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,0\n\n\tcmovc\tr8,rax\n\tcmovc\tr9,rsi\n\tcmovc\tr10,rbx\n\tcmovc\tr11,rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trax\n\tlfence\n\tjmp\trax\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__lshift_mod_256\tENDP\n\n\nPUBLIC\tlshift_mod_256\n\n\nALIGN\t32\nlshift_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_lshift_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n$L$SEH_body_lshift_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n$L$oop_lshift_mod_256::\n\tcall\t__lshift_mod_256\n\tdec\tedx\n\tjnz\t$L$oop_lshift_mod_256\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tmov\tr12,QWORD PTR[rsp]\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_lshift_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_lshift_mod_256::\nlshift_mod_256\tENDP\n\n\nPUBLIC\trshift_mod_256\n\n\nALIGN\t32\nrshift_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_rshift_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tsub\trsp,8\n\n$L$SEH_body_rshift_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trbp,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n$L$oop_rshift_mod_256::\n\tmov\tr8,rbp\n\tand\trbp,1\n\tmov\trax,QWORD PTR[rcx]\n\tneg\trbp\n\tmov\trsi,QWORD PTR[8+rcx]\n\tmov\trbx,QWORD PTR[16+rcx]\n\n\tand\trax,rbp\n\tand\trsi,rbp\n\tand\trbx,rbp\n\tand\trbp,QWORD PTR[24+rcx]\n\n\tadd\tr8,rax\n\tadc\tr9,rsi\n\tadc\tr10,rbx\n\tadc\tr11,rbp\n\tsbb\trax,rax\n\n\tshr\tr8,1\n\tmov\trbp,r9\n\tshr\tr9,1\n\tmov\trbx,r10\n\tshr\tr10,1\n\tmov\trsi,r11\n\tshr\tr11,1\n\n\tshl\trbp,63\n\tshl\trbx,63\n\tor\trbp,r8\n\tshl\trsi,63\n\tor\tr9,rbx\n\tshl\trax,63\n\tor\tr10,rsi\n\tor\tr11,rax\n\n\tdec\tedx\n\tjnz\t$L$oop_rshift_mod_256\n\n\tmov\tQWORD PTR[rdi],rbp\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_rshift_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_rshift_mod_256::\nrshift_mod_256\tENDP\n\n\nPUBLIC\tcneg_mod_256\n\n\nALIGN\t32\ncneg_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_cneg_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n$L$SEH_body_cneg_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr12,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr8,r12\n\tmov\tr11,QWORD PTR[24+rsi]\n\tor\tr12,r9\n\tor\tr12,r10\n\tor\tr12,r11\n\tmov\trbp,-1\n\n\tmov\trax,QWORD PTR[rcx]\n\tcmovnz\tr12,rbp\n\tmov\trsi,QWORD PTR[8+rcx]\n\tmov\trbx,QWORD PTR[16+rcx]\n\tand\trax,r12\n\tmov\trbp,QWORD PTR[24+rcx]\n\tand\trsi,r12\n\tand\trbx,r12\n\tand\trbp,r12\n\n\tsub\trax,r8\n\tsbb\trsi,r9\n\tsbb\trbx,r10\n\tsbb\trbp,r11\n\n\tor\trdx,rdx\n\n\tcmovz\trax,r8\n\tcmovz\trsi,r9\n\tmov\tQWORD PTR[rdi],rax\n\tcmovz\trbx,r10\n\tmov\tQWORD PTR[8+rdi],rsi\n\tcmovz\trbp,r11\n\tmov\tQWORD PTR[16+rdi],rbx\n\tmov\tQWORD PTR[24+rdi],rbp\n\n\tmov\tr12,QWORD PTR[rsp]\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_cneg_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_cneg_mod_256::\ncneg_mod_256\tENDP\n\n\nPUBLIC\tsub_mod_256\n\n\nALIGN\t32\nsub_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sub_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tsub\trsp,8\n\n$L$SEH_body_sub_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\trax,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\trsi,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trbx,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbp,QWORD PTR[24+rcx]\n\tsbb\trdx,rdx\n\n\tand\trax,rdx\n\tand\trsi,rdx\n\tand\trbx,rdx\n\tand\trbp,rdx\n\n\tadd\tr8,rax\n\tadc\tr9,rsi\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_sub_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sub_mod_256::\nsub_mod_256\tENDP\n\n\nPUBLIC\tcheck_mod_256\n\n\nALIGN\t32\ncheck_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_check_mod_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trax,QWORD PTR[rdi]\n\tmov\tr9,QWORD PTR[8+rdi]\n\tmov\tr10,QWORD PTR[16+rdi]\n\tmov\tr11,QWORD PTR[24+rdi]\n\n\tmov\tr8,rax\n\tor\trax,r9\n\tor\trax,r10\n\tor\trax,r11\n\n\tsub\tr8,QWORD PTR[rsi]\n\tsbb\tr9,QWORD PTR[8+rsi]\n\tsbb\tr10,QWORD PTR[16+rsi]\n\tsbb\tr11,QWORD PTR[24+rsi]\n\tsbb\trsi,rsi\n\n\tmov\trdx,1\n\tcmp\trax,0\n\tcmovne\trax,rdx\n\tand\trax,rsi\n$L$SEH_epilogue_check_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_check_mod_256::\ncheck_mod_256\tENDP\n\n\nPUBLIC\tadd_n_check_mod_256\n\n\nALIGN\t32\nadd_n_check_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_add_n_check_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tsub\trsp,8\n\n$L$SEH_body_add_n_check_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n\tadd\tr8,QWORD PTR[rdx]\n\tadc\tr9,QWORD PTR[8+rdx]\n\tmov\trax,r8\n\tadc\tr10,QWORD PTR[16+rdx]\n\tmov\trsi,r9\n\tadc\tr11,QWORD PTR[24+rdx]\n\tsbb\trdx,rdx\n\n\tmov\trbx,r10\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tmov\trbp,r11\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr8,rax\n\tcmovc\tr9,rsi\n\tmov\tQWORD PTR[rdi],r8\n\tcmovc\tr10,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tcmovc\tr11,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tor\tr8,r9\n\tor\tr10,r11\n\tor\tr8,r10\n\tmov\trax,1\n\tcmovz\trax,r8\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_add_n_check_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_add_n_check_mod_256::\nadd_n_check_mod_256\tENDP\n\n\nPUBLIC\tsub_n_check_mod_256\n\n\nALIGN\t32\nsub_n_check_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sub_n_check_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tsub\trsp,8\n\n$L$SEH_body_sub_n_check_mod_256::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\trax,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\trsi,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trbx,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbp,QWORD PTR[24+rcx]\n\tsbb\trdx,rdx\n\n\tand\trax,rdx\n\tand\trsi,rdx\n\tand\trbx,rdx\n\tand\trbp,rdx\n\n\tadd\tr8,rax\n\tadc\tr9,rsi\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tor\tr8,r9\n\tor\tr10,r11\n\tor\tr8,r10\n\tmov\trax,1\n\tcmovz\trax,r8\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_sub_n_check_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sub_n_check_mod_256::\nsub_n_check_mod_256\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_add_mod_256\n\tDD\timagerel $L$SEH_body_add_mod_256\n\tDD\timagerel $L$SEH_info_add_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_add_mod_256\n\tDD\timagerel $L$SEH_epilogue_add_mod_256\n\tDD\timagerel $L$SEH_info_add_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_add_mod_256\n\tDD\timagerel $L$SEH_end_add_mod_256\n\tDD\timagerel $L$SEH_info_add_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_by_3_mod_256\n\tDD\timagerel $L$SEH_body_mul_by_3_mod_256\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_mul_by_3_mod_256\n\tDD\timagerel $L$SEH_epilogue_mul_by_3_mod_256\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_by_3_mod_256\n\tDD\timagerel $L$SEH_end_mul_by_3_mod_256\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_lshift_mod_256\n\tDD\timagerel $L$SEH_body_lshift_mod_256\n\tDD\timagerel $L$SEH_info_lshift_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_lshift_mod_256\n\tDD\timagerel $L$SEH_epilogue_lshift_mod_256\n\tDD\timagerel $L$SEH_info_lshift_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_lshift_mod_256\n\tDD\timagerel $L$SEH_end_lshift_mod_256\n\tDD\timagerel $L$SEH_info_lshift_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_rshift_mod_256\n\tDD\timagerel $L$SEH_body_rshift_mod_256\n\tDD\timagerel $L$SEH_info_rshift_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_rshift_mod_256\n\tDD\timagerel $L$SEH_epilogue_rshift_mod_256\n\tDD\timagerel $L$SEH_info_rshift_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_rshift_mod_256\n\tDD\timagerel $L$SEH_end_rshift_mod_256\n\tDD\timagerel $L$SEH_info_rshift_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_cneg_mod_256\n\tDD\timagerel $L$SEH_body_cneg_mod_256\n\tDD\timagerel $L$SEH_info_cneg_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_cneg_mod_256\n\tDD\timagerel $L$SEH_epilogue_cneg_mod_256\n\tDD\timagerel $L$SEH_info_cneg_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_cneg_mod_256\n\tDD\timagerel $L$SEH_end_cneg_mod_256\n\tDD\timagerel $L$SEH_info_cneg_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_sub_mod_256\n\tDD\timagerel $L$SEH_body_sub_mod_256\n\tDD\timagerel $L$SEH_info_sub_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_sub_mod_256\n\tDD\timagerel $L$SEH_epilogue_sub_mod_256\n\tDD\timagerel $L$SEH_info_sub_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_sub_mod_256\n\tDD\timagerel $L$SEH_end_sub_mod_256\n\tDD\timagerel $L$SEH_info_sub_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_epilogue_check_mod_256\n\tDD\timagerel $L$SEH_end_check_mod_256\n\tDD\timagerel $L$SEH_info_check_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_add_n_check_mod_256\n\tDD\timagerel $L$SEH_body_add_n_check_mod_256\n\tDD\timagerel $L$SEH_info_add_n_check_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_add_n_check_mod_256\n\tDD\timagerel $L$SEH_epilogue_add_n_check_mod_256\n\tDD\timagerel $L$SEH_info_add_n_check_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_add_n_check_mod_256\n\tDD\timagerel $L$SEH_end_add_n_check_mod_256\n\tDD\timagerel $L$SEH_info_add_n_check_mod_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_sub_n_check_mod_256\n\tDD\timagerel $L$SEH_body_sub_n_check_mod_256\n\tDD\timagerel $L$SEH_info_sub_n_check_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_sub_n_check_mod_256\n\tDD\timagerel $L$SEH_epilogue_sub_n_check_mod_256\n\tDD\timagerel $L$SEH_info_sub_n_check_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_sub_n_check_mod_256\n\tDD\timagerel $L$SEH_end_sub_n_check_mod_256\n\tDD\timagerel $L$SEH_info_sub_n_check_mod_256_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_add_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_add_mod_256_body::\nDB\t1,0,9,0\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_add_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_by_3_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_by_3_mod_256_body::\nDB\t1,0,11,0\nDB\t000h,0c4h,000h,000h\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\n$L$SEH_info_mul_by_3_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_lshift_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_lshift_mod_256_body::\nDB\t1,0,11,0\nDB\t000h,0c4h,000h,000h\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\n$L$SEH_info_lshift_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_rshift_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_rshift_mod_256_body::\nDB\t1,0,9,0\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_rshift_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_cneg_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_cneg_mod_256_body::\nDB\t1,0,11,0\nDB\t000h,0c4h,000h,000h\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\n$L$SEH_info_cneg_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sub_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sub_mod_256_body::\nDB\t1,0,9,0\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sub_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_check_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_add_n_check_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_add_n_check_mod_256_body::\nDB\t1,0,9,0\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_add_n_check_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sub_n_check_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sub_n_check_mod_256_body::\nDB\t1,0,9,0\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sub_n_check_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/add_mod_384-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|add_mod_384|[FUNC]\n\tALIGN\t32\n|add_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__add_mod_384| PROC\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n|__add_mod_384_ab_are_loaded|\n\tadds\tx10,x10,x16\n\tadcs\tx11,x11,x17\n\tadcs\tx12,x12,x19\n\tadcs\tx13,x13,x20\n\tadcs\tx14,x14,x21\n\tadcs\tx15,x15,x22\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx10,x10,x16\n\tcsello\tx11,x11,x17\n\tcsello\tx12,x12,x19\n\tcsello\tx13,x13,x20\n\tcsello\tx14,x14,x21\n\tcsello\tx15,x15,x22\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|add_mod_384x|[FUNC]\n\tALIGN\t32\n|add_mod_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__add_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__add_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|rshift_mod_384|[FUNC]\n\tALIGN\t32\n|rshift_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n|$Loop_rshift_mod_384|\n\tsub\tx2,x2,#1\n\tbl\t__rshift_mod_384\n\tcbnz\tx2,|$Loop_rshift_mod_384|\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__rshift_mod_384| PROC\n\tsbfx\tx22,x10,#0,#1\n\tand\tx16,x22,x4\n\tand\tx17,x22,x5\n\tadds\tx10,x10,x16\n\tand\tx19,x22,x6\n\tadcs\tx11,x11,x17\n\tand\tx20,x22,x7\n\tadcs\tx12,x12,x19\n\tand\tx21,x22,x8\n\tadcs\tx13,x13,x20\n\tand\tx22,x22,x9\n\tadcs\tx14,x14,x21\n\textr\tx10,x11,x10,#1\n\tadcs\tx15,x15,x22\n\textr\tx11,x12,x11,#1\n\tadc\tx22,xzr,xzr\n\textr\tx12,x13,x12,#1\n\textr\tx13,x14,x13,#1\n\textr\tx14,x15,x14,#1\n\textr\tx15,x22,x15,#1\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|div_by_2_mod_384|[FUNC]\n\tALIGN\t32\n|div_by_2_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__rshift_mod_384\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|lshift_mod_384|[FUNC]\n\tALIGN\t32\n|lshift_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n|$Loop_lshift_mod_384|\n\tsub\tx2,x2,#1\n\tbl\t__lshift_mod_384\n\tcbnz\tx2,|$Loop_lshift_mod_384|\n\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__lshift_mod_384| PROC\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx16,x10,x4\n\tsbcs\tx17,x11,x5\n\tsbcs\tx19,x12,x6\n\tsbcs\tx20,x13,x7\n\tsbcs\tx21,x14,x8\n\tsbcs\tx22,x15,x9\n\tsbcs\txzr,x3,xzr\n\n\tcsello\tx10,x10,x16\n\tcsello\tx11,x11,x17\n\tcsello\tx12,x12,x19\n\tcsello\tx13,x13,x20\n\tcsello\tx14,x14,x21\n\tcsello\tx15,x15,x22\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_by_3_mod_384|[FUNC]\n\tALIGN\t32\n|mul_by_3_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_by_8_mod_384|[FUNC]\n\tALIGN\t32\n|mul_by_8_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_by_3_mod_384x|[FUNC]\n\tALIGN\t32\n|mul_by_3_mod_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\n\tldp\tx16,x17,[x1,#48]\n\tldp\tx19,x20,[x1,#64]\n\tldp\tx21,x22,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_by_8_mod_384x|[FUNC]\n\tALIGN\t32\n|mul_by_8_mod_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx14,x15,[x1,#32]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|cneg_mod_384|[FUNC]\n\tALIGN\t32\n|cneg_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldp\tx4,x5,[x3]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx6,x7,[x3,#16]\n\n\tsubs\tx16,x4,x10\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx8,x9,[x3,#32]\n\torr\tx3,x10,x11\n\tsbcs\tx17,x5,x11\n\torr\tx3,x3,x12\n\tsbcs\tx19,x6,x12\n\torr\tx3,x3,x13\n\tsbcs\tx20,x7,x13\n\torr\tx3,x3,x14\n\tsbcs\tx21,x8,x14\n\torr\tx3,x3,x15\n\tsbc\tx22,x9,x15\n\n\tcmp\tx3,#0\n\tcsetmne\tx3\n\tands\tx2,x2,x3\n\n\tcseleq\tx10,x10,x16\n\tcseleq\tx11,x11,x17\n\tcseleq\tx12,x12,x19\n\tcseleq\tx13,x13,x20\n\tstp\tx10,x11,[x0]\n\tcseleq\tx14,x14,x21\n\tstp\tx12,x13,[x0,#16]\n\tcseleq\tx15,x15,x22\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sub_mod_384|[FUNC]\n\tALIGN\t32\n|sub_mod_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\tstp\tx14,x15,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__sub_mod_384| PROC\n\tldp\tx10,x11,[x1]\n\tldp\tx16,x17,[x2]\n\tldp\tx12,x13,[x1,#16]\n\tldp\tx19,x20,[x2,#16]\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx21,x22,[x2,#32]\n\n\tsubs\tx10,x10,x16\n\tsbcs\tx11,x11,x17\n\tsbcs\tx12,x12,x19\n\tsbcs\tx13,x13,x20\n\tsbcs\tx14,x14,x21\n\tsbcs\tx15,x15,x22\n\tsbc\tx3,xzr,xzr\n\n\tand\tx16,x4,x3\n\tand\tx17,x5,x3\n\tadds\tx10,x10,x16\n\tand\tx19,x6,x3\n\tadcs\tx11,x11,x17\n\tand\tx20,x7,x3\n\tadcs\tx12,x12,x19\n\tand\tx21,x8,x3\n\tadcs\tx13,x13,x20\n\tand\tx22,x9,x3\n\tadcs\tx14,x14,x21\n\tadc\tx15,x15,x22\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sub_mod_384x|[FUNC]\n\tALIGN\t32\n|sub_mod_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x3]\n\tldp\tx6,x7,[x3,#16]\n\tldp\tx8,x9,[x3,#32]\n\n\tbl\t__sub_mod_384\n\n\tstp\tx10,x11,[x0]\n\tadd\tx1,x1,#48\n\tstp\tx12,x13,[x0,#16]\n\tadd\tx2,x2,#48\n\tstp\tx14,x15,[x0,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_by_1_plus_i_mod_384x|[FUNC]\n\tALIGN\t32\n|mul_by_1_plus_i_mod_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx4,x5,[x2]\n\tldp\tx6,x7,[x2,#16]\n\tldp\tx8,x9,[x2,#32]\n\tadd\tx2,x1,#48\n\n\tbl\t__sub_mod_384\n\n\tldp\tx16,x17,[x1]\n\tldp\tx19,x20,[x1,#16]\n\tldp\tx21,x22,[x1,#32]\n\tstp\tx10,x11,[x0]\n\tldp\tx10,x11,[x1,#48]\n\tstp\tx12,x13,[x0,#16]\n\tldp\tx12,x13,[x1,#64]\n\tstp\tx14,x15,[x0,#32]\n\tldp\tx14,x15,[x1,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tx30,[sp,#__SIZEOF_POINTER__]\n\n\tstp\tx10,x11,[x0,#48]\n\tstp\tx12,x13,[x0,#64]\n\tstp\tx14,x15,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sgn0_pty_mod_384|[FUNC]\n\tALIGN\t32\n|sgn0_pty_mod_384| PROC\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx0,x10,#1\n\tadds\tx10,x10,x10\n\tadcs\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx3,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx3,x3,xzr\n\n\tmvn\tx3,x3\n\tand\tx3,x3,#2\n\torr\tx0,x0,x3\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sgn0_pty_mod_384x|[FUNC]\n\tALIGN\t32\n|sgn0_pty_mod_384x| PROC\n\thint\t#34\n\tldp\tx10,x11,[x0]\n\tldp\tx12,x13,[x0,#16]\n\tldp\tx14,x15,[x0,#32]\n\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tldp\tx8,x9,[x1,#32]\n\n\tand\tx2,x10,#1\n\torr\tx3,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx3,x3,x12\n\tadcs\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tldp\tx10,x11,[x0,#48]\n\tldp\tx12,x13,[x0,#64]\n\tldp\tx14,x15,[x0,#80]\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx2,x2,x16\n\n\tand\tx0,x10,#1\n\torr\tx1,x10,x11\n\tadds\tx10,x10,x10\n\torr\tx1,x1,x12\n\tadcs\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadc\tx16,xzr,xzr\n\n\tsubs\tx10,x10,x4\n\tsbcs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbc\tx16,x16,xzr\n\n\tmvn\tx16,x16\n\tand\tx16,x16,#2\n\torr\tx0,x0,x16\n\n\tcmp\tx3,#0\n\tcseleq\tx3,x0,x2\n\n\tcmp\tx1,#0\n\tcselne\tx1,x0,x2\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\n\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_select_32|[FUNC]\n\tALIGN\t32\n|vec_select_32| PROC\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d}, [x1]\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d}, [x2]\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tst1\t{v0.2d, v1.2d}, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_select_48|[FUNC]\n\tALIGN\t32\n|vec_select_48| PROC\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_select_96|[FUNC]\n\tALIGN\t32\n|vec_select_96| PROC\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_select_192|[FUNC]\n\tALIGN\t32\n|vec_select_192| PROC\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_select_144|[FUNC]\n\tALIGN\t32\n|vec_select_144| PROC\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_select_288|[FUNC]\n\tALIGN\t32\n|vec_select_288| PROC\n\thint\t#34\n\tdup\tv6.2d, x3\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tld1\t{v0.2d, v1.2d, v2.2d}, [x1],#48\n\tbit\tv17.16b, v20.16b, v6.16b\n\tld1\t{v3.2d, v4.2d, v5.2d}, [x2],#48\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0],#48\n\tbit\tv0.16b, v3.16b, v6.16b\n\tld1\t{v16.2d, v17.2d, v18.2d}, [x1],#48\n\tbit\tv1.16b, v4.16b, v6.16b\n\tld1\t{v19.2d, v20.2d, v21.2d}, [x2],#48\n\tbit\tv2.16b, v5.16b, v6.16b\n\tst1\t{v0.2d, v1.2d, v2.2d}, [x0],#48\n\tbit\tv16.16b, v19.16b, v6.16b\n\tbit\tv17.16b, v20.16b, v6.16b\n\tbit\tv18.16b, v21.16b, v6.16b\n\tst1\t{v16.2d, v17.2d, v18.2d}, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_prefetch|[FUNC]\n\tALIGN\t32\n|vec_prefetch| PROC\n\thint\t#34\n\tadd\tx1, x1, x0\n\tsub\tx1, x1, #1\n\tmov\tx2, #64\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcselhi\tx0,x1,x0\n\tcselhi\tx2,xzr,x2\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcselhi\tx0,x1,x0\n\tcselhi\tx2,xzr,x2\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcselhi\tx0,x1,x0\n\tcselhi\tx2,xzr,x2\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcselhi\tx0,x1,x0\n\tcselhi\tx2,xzr,x2\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcselhi\tx0,x1,x0\n\tcselhi\tx2,xzr,x2\n\tprfm\tpldl1keep, [x0]\n\tadd\tx0, x0, x2\n\tcmp\tx0, x1\n\tcselhi\tx0,x1,x0\n\tprfm\tpldl1keep, [x0]\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_is_zero_16x|[FUNC]\n\tALIGN\t32\n|vec_is_zero_16x| PROC\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tlsr\tx1, x1, #4\n\tsub\tx1, x1, #1\n\tcbz\tx1, |$Loop_is_zero_done|\n\n|$Loop_is_zero|\n\tld1\t{v1.2d}, [x0], #16\n\torr\tv0.16b, v0.16b, v1.16b\n\tsub\tx1, x1, #1\n\tcbnz\tx1, |$Loop_is_zero|\n\n|$Loop_is_zero_done|\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcseleq\tx0,x0,xzr\n\tret\n\tENDP\n\n\n\tEXPORT\t|vec_is_equal_16x|[FUNC]\n\tALIGN\t32\n|vec_is_equal_16x| PROC\n\thint\t#34\n\tld1\t{v0.2d}, [x0], #16\n\tld1\t{v1.2d}, [x1], #16\n\tlsr\tx2, x2, #4\n\teor\tv0.16b, v0.16b, v1.16b\n\n|$Loop_is_equal|\n\tsub\tx2, x2, #1\n\tcbz\tx2, |$Loop_is_equal_done|\n\tld1\t{v1.2d}, [x0], #16\n\tld1\t{v2.2d}, [x1], #16\n\teor\tv1.16b, v1.16b, v2.16b\n\torr\tv0.16b, v0.16b, v1.16b\n\tb\t|$Loop_is_equal|\n\tnop\n\n|$Loop_is_equal_done|\n\tdup\tv1.2d, v0.d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcseleq\tx0,x0,xzr\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/add_mod_384-x86_64.asm",
    "content": "OPTION\tDOTNAME\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tadd_mod_384\n\n\nALIGN\t32\nadd_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_add_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_add_mod_384::\n\n\n\tcall\t__add_mod_384\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_add_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_add_mod_384::\nadd_mod_384\tENDP\n\n\nALIGN\t32\n__add_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n__add_mod_384_a_is_loaded::\n\tadd\tr8,QWORD PTR[rdx]\n\tadc\tr9,QWORD PTR[8+rdx]\n\tadc\tr10,QWORD PTR[16+rdx]\n\tmov\tr14,r8\n\tadc\tr11,QWORD PTR[24+rdx]\n\tmov\tr15,r9\n\tadc\tr12,QWORD PTR[32+rdx]\n\tmov\trax,r10\n\tadc\tr13,QWORD PTR[40+rdx]\n\tmov\trbx,r11\n\tsbb\trdx,rdx\n\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tmov\trbp,r12\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rcx]\n\tmov\trsi,r13\n\tsbb\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr8,r14\n\tcmovc\tr9,r15\n\tcmovc\tr10,rax\n\tmov\tQWORD PTR[rdi],r8\n\tcmovc\tr11,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tcmovc\tr12,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tcmovc\tr13,rsi\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__add_mod_384\tENDP\n\nPUBLIC\tadd_mod_384x\n\n\nALIGN\t32\nadd_mod_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_add_mod_384x::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,24\n\n$L$SEH_body_add_mod_384x::\n\n\n\tmov\tQWORD PTR[rsp],rsi\n\tmov\tQWORD PTR[8+rsp],rdx\n\tlea\trsi,QWORD PTR[48+rsi]\n\tlea\trdx,QWORD PTR[48+rdx]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__add_mod_384\n\n\tmov\trsi,QWORD PTR[rsp]\n\tmov\trdx,QWORD PTR[8+rsp]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\n\tcall\t__add_mod_384\n\n\tmov\tr15,QWORD PTR[((24+0))+rsp]\n\n\tmov\tr14,QWORD PTR[((24+8))+rsp]\n\n\tmov\tr13,QWORD PTR[((24+16))+rsp]\n\n\tmov\tr12,QWORD PTR[((24+24))+rsp]\n\n\tmov\trbx,QWORD PTR[((24+32))+rsp]\n\n\tmov\trbp,QWORD PTR[((24+40))+rsp]\n\n\tlea\trsp,QWORD PTR[((24+48))+rsp]\n\n$L$SEH_epilogue_add_mod_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_add_mod_384x::\nadd_mod_384x\tENDP\n\n\nPUBLIC\trshift_mod_384\n\n\nALIGN\t32\nrshift_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_rshift_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdi\n\n$L$SEH_body_rshift_mod_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n$L$oop_rshift_mod_384::\n\tcall\t__rshift_mod_384\n\tdec\tedx\n\tjnz\t$L$oop_rshift_mod_384\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_rshift_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_rshift_mod_384::\nrshift_mod_384\tENDP\n\n\nALIGN\t32\n__rshift_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trsi,1\n\tmov\tr14,QWORD PTR[rcx]\n\tand\trsi,r8\n\tmov\tr15,QWORD PTR[8+rcx]\n\tneg\trsi\n\tmov\trax,QWORD PTR[16+rcx]\n\tand\tr14,rsi\n\tmov\trbx,QWORD PTR[24+rcx]\n\tand\tr15,rsi\n\tmov\trbp,QWORD PTR[32+rcx]\n\tand\trax,rsi\n\tand\trbx,rsi\n\tand\trbp,rsi\n\tand\trsi,QWORD PTR[40+rcx]\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tadc\trax,r10\n\tadc\trbx,r11\n\tadc\trbp,r12\n\tadc\trsi,r13\n\tsbb\tr13,r13\n\n\tshr\tr14,1\n\tmov\tr8,r15\n\tshr\tr15,1\n\tmov\tr9,rax\n\tshr\trax,1\n\tmov\tr10,rbx\n\tshr\trbx,1\n\tmov\tr11,rbp\n\tshr\trbp,1\n\tmov\tr12,rsi\n\tshr\trsi,1\n\tshl\tr8,63\n\tshl\tr9,63\n\tor\tr8,r14\n\tshl\tr10,63\n\tor\tr9,r15\n\tshl\tr11,63\n\tor\tr10,rax\n\tshl\tr12,63\n\tor\tr11,rbx\n\tshl\tr13,63\n\tor\tr12,rbp\n\tor\tr13,rsi\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr14\n\tlfence\n\tjmp\tr14\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__rshift_mod_384\tENDP\n\nPUBLIC\tdiv_by_2_mod_384\n\n\nALIGN\t32\ndiv_by_2_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_div_by_2_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdi\n\n$L$SEH_body_div_by_2_mod_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\trcx,rdx\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tcall\t__rshift_mod_384\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_div_by_2_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_div_by_2_mod_384::\ndiv_by_2_mod_384\tENDP\n\n\nPUBLIC\tlshift_mod_384\n\n\nALIGN\t32\nlshift_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_lshift_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdi\n\n$L$SEH_body_lshift_mod_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n$L$oop_lshift_mod_384::\n\tadd\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tmov\tr14,r8\n\tadc\tr11,r11\n\tmov\tr15,r9\n\tadc\tr12,r12\n\tmov\trax,r10\n\tadc\tr13,r13\n\tmov\trbx,r11\n\tsbb\trdi,rdi\n\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tmov\trbp,r12\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rcx]\n\tmov\trsi,r13\n\tsbb\tr13,QWORD PTR[40+rcx]\n\tsbb\trdi,0\n\n\tmov\trdi,QWORD PTR[rsp]\n\tcmovc\tr8,r14\n\tcmovc\tr9,r15\n\tcmovc\tr10,rax\n\tcmovc\tr11,rbx\n\tcmovc\tr12,rbp\n\tcmovc\tr13,rsi\n\n\tdec\tedx\n\tjnz\t$L$oop_lshift_mod_384\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_lshift_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_lshift_mod_384::\nlshift_mod_384\tENDP\n\n\nALIGN\t32\n__lshift_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tadd\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tmov\tr14,r8\n\tadc\tr11,r11\n\tmov\tr15,r9\n\tadc\tr12,r12\n\tmov\trax,r10\n\tadc\tr13,r13\n\tmov\trbx,r11\n\tsbb\trdx,rdx\n\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tmov\trbp,r12\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rcx]\n\tmov\trsi,r13\n\tsbb\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr8,r14\n\tcmovc\tr9,r15\n\tcmovc\tr10,rax\n\tcmovc\tr11,rbx\n\tcmovc\tr12,rbp\n\tcmovc\tr13,rsi\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__lshift_mod_384\tENDP\n\n\nPUBLIC\tmul_by_3_mod_384\n\n\nALIGN\t32\nmul_by_3_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_by_3_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trsi\n\n$L$SEH_body_mul_by_3_mod_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\trcx,rdx\n\n\tcall\t__lshift_mod_384\n\n\tmov\trdx,QWORD PTR[rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_mul_by_3_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_by_3_mod_384::\nmul_by_3_mod_384\tENDP\n\nPUBLIC\tmul_by_8_mod_384\n\n\nALIGN\t32\nmul_by_8_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_by_8_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_mul_by_8_mod_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\trcx,rdx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_mul_by_8_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_by_8_mod_384::\nmul_by_8_mod_384\tENDP\n\n\nPUBLIC\tmul_by_3_mod_384x\n\n\nALIGN\t32\nmul_by_3_mod_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_by_3_mod_384x::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trsi\n\n$L$SEH_body_mul_by_3_mod_384x::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\trcx,rdx\n\n\tcall\t__lshift_mod_384\n\n\tmov\trdx,QWORD PTR[rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmov\trsi,QWORD PTR[rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[48+rsi]\n\tmov\tr9,QWORD PTR[56+rsi]\n\tmov\tr10,QWORD PTR[64+rsi]\n\tmov\tr11,QWORD PTR[72+rsi]\n\tmov\tr12,QWORD PTR[80+rsi]\n\tmov\tr13,QWORD PTR[88+rsi]\n\n\tcall\t__lshift_mod_384\n\n\tmov\trdx,8*6\n\tadd\trdx,QWORD PTR[rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_mul_by_3_mod_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_by_3_mod_384x::\nmul_by_3_mod_384x\tENDP\n\nPUBLIC\tmul_by_8_mod_384x\n\n\nALIGN\t32\nmul_by_8_mod_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_by_8_mod_384x::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trsi\n\n$L$SEH_body_mul_by_8_mod_384x::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\trcx,rdx\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmov\trsi,QWORD PTR[rsp]\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[((48+0))+rsi]\n\tmov\tr9,QWORD PTR[((48+8))+rsi]\n\tmov\tr10,QWORD PTR[((48+16))+rsi]\n\tmov\tr11,QWORD PTR[((48+24))+rsi]\n\tmov\tr12,QWORD PTR[((48+32))+rsi]\n\tmov\tr13,QWORD PTR[((48+40))+rsi]\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmov\tQWORD PTR[((48+0))+rdi],r8\n\tmov\tQWORD PTR[((48+8))+rdi],r9\n\tmov\tQWORD PTR[((48+16))+rdi],r10\n\tmov\tQWORD PTR[((48+24))+rdi],r11\n\tmov\tQWORD PTR[((48+32))+rdi],r12\n\tmov\tQWORD PTR[((48+40))+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_mul_by_8_mod_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_by_8_mod_384x::\nmul_by_8_mod_384x\tENDP\n\n\nPUBLIC\tcneg_mod_384\n\n\nALIGN\t32\ncneg_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_cneg_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdx\n\n$L$SEH_body_cneg_mod_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr8,rdx\n\tmov\tr11,QWORD PTR[24+rsi]\n\tor\trdx,r9\n\tmov\tr12,QWORD PTR[32+rsi]\n\tor\trdx,r10\n\tmov\tr13,QWORD PTR[40+rsi]\n\tor\trdx,r11\n\tmov\trsi,-1\n\tor\trdx,r12\n\tor\trdx,r13\n\n\tmov\tr14,QWORD PTR[rcx]\n\tcmovnz\trdx,rsi\n\tmov\tr15,QWORD PTR[8+rcx]\n\tmov\trax,QWORD PTR[16+rcx]\n\tand\tr14,rdx\n\tmov\trbx,QWORD PTR[24+rcx]\n\tand\tr15,rdx\n\tmov\trbp,QWORD PTR[32+rcx]\n\tand\trax,rdx\n\tmov\trsi,QWORD PTR[40+rcx]\n\tand\trbx,rdx\n\tmov\trcx,QWORD PTR[rsp]\n\tand\trbp,rdx\n\tand\trsi,rdx\n\n\tsub\tr14,r8\n\tsbb\tr15,r9\n\tsbb\trax,r10\n\tsbb\trbx,r11\n\tsbb\trbp,r12\n\tsbb\trsi,r13\n\n\tor\trcx,rcx\n\n\tcmovz\tr14,r8\n\tcmovz\tr15,r9\n\tcmovz\trax,r10\n\tmov\tQWORD PTR[rdi],r14\n\tcmovz\trbx,r11\n\tmov\tQWORD PTR[8+rdi],r15\n\tcmovz\trbp,r12\n\tmov\tQWORD PTR[16+rdi],rax\n\tcmovz\trsi,r13\n\tmov\tQWORD PTR[24+rdi],rbx\n\tmov\tQWORD PTR[32+rdi],rbp\n\tmov\tQWORD PTR[40+rdi],rsi\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_cneg_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_cneg_mod_384::\ncneg_mod_384\tENDP\n\n\nPUBLIC\tsub_mod_384\n\n\nALIGN\t32\nsub_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sub_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sub_mod_384::\n\n\n\tcall\t__sub_mod_384\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sub_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sub_mod_384::\nsub_mod_384\tENDP\n\n\nALIGN\t32\n__sub_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\tr14,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\tr15,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trax,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbx,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trbp,QWORD PTR[32+rcx]\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tmov\trsi,QWORD PTR[40+rcx]\n\tsbb\trdx,rdx\n\n\tand\tr14,rdx\n\tand\tr15,rdx\n\tand\trax,rdx\n\tand\trbx,rdx\n\tand\trbp,rdx\n\tand\trsi,rdx\n\n\tadd\tr8,r14\n\tadc\tr9,r15\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,rax\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,rbx\n\tmov\tQWORD PTR[16+rdi],r10\n\tadc\tr12,rbp\n\tmov\tQWORD PTR[24+rdi],r11\n\tadc\tr13,rsi\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__sub_mod_384\tENDP\n\nPUBLIC\tsub_mod_384x\n\n\nALIGN\t32\nsub_mod_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sub_mod_384x::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,24\n\n$L$SEH_body_sub_mod_384x::\n\n\n\tmov\tQWORD PTR[rsp],rsi\n\tmov\tQWORD PTR[8+rsp],rdx\n\tlea\trsi,QWORD PTR[48+rsi]\n\tlea\trdx,QWORD PTR[48+rdx]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__sub_mod_384\n\n\tmov\trsi,QWORD PTR[rsp]\n\tmov\trdx,QWORD PTR[8+rsp]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\n\tcall\t__sub_mod_384\n\n\tmov\tr15,QWORD PTR[((24+0))+rsp]\n\n\tmov\tr14,QWORD PTR[((24+8))+rsp]\n\n\tmov\tr13,QWORD PTR[((24+16))+rsp]\n\n\tmov\tr12,QWORD PTR[((24+24))+rsp]\n\n\tmov\trbx,QWORD PTR[((24+32))+rsp]\n\n\tmov\trbp,QWORD PTR[((24+40))+rsp]\n\n\tlea\trsp,QWORD PTR[((24+48))+rsp]\n\n$L$SEH_epilogue_sub_mod_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sub_mod_384x::\nsub_mod_384x\tENDP\nPUBLIC\tmul_by_1_plus_i_mod_384x\n\n\nALIGN\t32\nmul_by_1_plus_i_mod_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_by_1_plus_i_mod_384x::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,56\n\n$L$SEH_body_mul_by_1_plus_i_mod_384x::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\tr14,r8\n\tadd\tr8,QWORD PTR[48+rsi]\n\tmov\tr15,r9\n\tadc\tr9,QWORD PTR[56+rsi]\n\tmov\trax,r10\n\tadc\tr10,QWORD PTR[64+rsi]\n\tmov\trbx,r11\n\tadc\tr11,QWORD PTR[72+rsi]\n\tmov\trcx,r12\n\tadc\tr12,QWORD PTR[80+rsi]\n\tmov\trbp,r13\n\tadc\tr13,QWORD PTR[88+rsi]\n\tmov\tQWORD PTR[48+rsp],rdi\n\tsbb\trdi,rdi\n\n\tsub\tr14,QWORD PTR[48+rsi]\n\tsbb\tr15,QWORD PTR[56+rsi]\n\tsbb\trax,QWORD PTR[64+rsi]\n\tsbb\trbx,QWORD PTR[72+rsi]\n\tsbb\trcx,QWORD PTR[80+rsi]\n\tsbb\trbp,QWORD PTR[88+rsi]\n\tsbb\trsi,rsi\n\n\tmov\tQWORD PTR[rsp],r8\n\tmov\tr8,QWORD PTR[rdx]\n\tmov\tQWORD PTR[8+rsp],r9\n\tmov\tr9,QWORD PTR[8+rdx]\n\tmov\tQWORD PTR[16+rsp],r10\n\tmov\tr10,QWORD PTR[16+rdx]\n\tmov\tQWORD PTR[24+rsp],r11\n\tmov\tr11,QWORD PTR[24+rdx]\n\tmov\tQWORD PTR[32+rsp],r12\n\tand\tr8,rsi\n\tmov\tr12,QWORD PTR[32+rdx]\n\tmov\tQWORD PTR[40+rsp],r13\n\tand\tr9,rsi\n\tmov\tr13,QWORD PTR[40+rdx]\n\tand\tr10,rsi\n\tand\tr11,rsi\n\tand\tr12,rsi\n\tand\tr13,rsi\n\tmov\trsi,QWORD PTR[48+rsp]\n\n\tadd\tr14,r8\n\tmov\tr8,QWORD PTR[rsp]\n\tadc\tr15,r9\n\tmov\tr9,QWORD PTR[8+rsp]\n\tadc\trax,r10\n\tmov\tr10,QWORD PTR[16+rsp]\n\tadc\trbx,r11\n\tmov\tr11,QWORD PTR[24+rsp]\n\tadc\trcx,r12\n\tmov\tr12,QWORD PTR[32+rsp]\n\tadc\trbp,r13\n\tmov\tr13,QWORD PTR[40+rsp]\n\n\tmov\tQWORD PTR[rsi],r14\n\tmov\tr14,r8\n\tmov\tQWORD PTR[8+rsi],r15\n\tmov\tQWORD PTR[16+rsi],rax\n\tmov\tr15,r9\n\tmov\tQWORD PTR[24+rsi],rbx\n\tmov\tQWORD PTR[32+rsi],rcx\n\tmov\trax,r10\n\tmov\tQWORD PTR[40+rsi],rbp\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\trbx,r11\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trcx,r12\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trbp,r13\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tsbb\trdi,0\n\n\tcmovc\tr8,r14\n\tcmovc\tr9,r15\n\tcmovc\tr10,rax\n\tmov\tQWORD PTR[48+rsi],r8\n\tcmovc\tr11,rbx\n\tmov\tQWORD PTR[56+rsi],r9\n\tcmovc\tr12,rcx\n\tmov\tQWORD PTR[64+rsi],r10\n\tcmovc\tr13,rbp\n\tmov\tQWORD PTR[72+rsi],r11\n\tmov\tQWORD PTR[80+rsi],r12\n\tmov\tQWORD PTR[88+rsi],r13\n\n\tmov\tr15,QWORD PTR[((56+0))+rsp]\n\n\tmov\tr14,QWORD PTR[((56+8))+rsp]\n\n\tmov\tr13,QWORD PTR[((56+16))+rsp]\n\n\tmov\tr12,QWORD PTR[((56+24))+rsp]\n\n\tmov\trbx,QWORD PTR[((56+32))+rsp]\n\n\tmov\trbp,QWORD PTR[((56+40))+rsp]\n\n\tlea\trsp,QWORD PTR[((56+48))+rsp]\n\n$L$SEH_epilogue_mul_by_1_plus_i_mod_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_by_1_plus_i_mod_384x::\nmul_by_1_plus_i_mod_384x\tENDP\nPUBLIC\tsgn0_pty_mod_384\n\n\nALIGN\t32\nsgn0_pty_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sgn0_pty_mod_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n$L$SEH_body_sgn0_pty_mod_384::\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rdi]\n\tmov\tr9,QWORD PTR[8+rdi]\n\tmov\tr10,QWORD PTR[16+rdi]\n\tmov\tr11,QWORD PTR[24+rdi]\n\tmov\trcx,QWORD PTR[32+rdi]\n\tmov\trdx,QWORD PTR[40+rdi]\n\n\txor\trax,rax\n\tmov\trdi,r8\n\tadd\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trcx,rcx\n\tadc\trdx,rdx\n\tadc\trax,0\n\n\tsub\tr8,QWORD PTR[rsi]\n\tsbb\tr9,QWORD PTR[8+rsi]\n\tsbb\tr10,QWORD PTR[16+rsi]\n\tsbb\tr11,QWORD PTR[24+rsi]\n\tsbb\trcx,QWORD PTR[32+rsi]\n\tsbb\trdx,QWORD PTR[40+rsi]\n\tsbb\trax,0\n\n\tnot\trax\n\tand\trdi,1\n\tand\trax,2\n\tor\trax,rdi\n\n$L$SEH_epilogue_sgn0_pty_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sgn0_pty_mod_384::\nsgn0_pty_mod_384\tENDP\n\nPUBLIC\tsgn0_pty_mod_384x\n\n\nALIGN\t32\nsgn0_pty_mod_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sgn0_pty_mod_384x::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tpush\trbx\n\n\tsub\trsp,8\n\n$L$SEH_body_sgn0_pty_mod_384x::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[48+rdi]\n\tmov\tr9,QWORD PTR[56+rdi]\n\tmov\tr10,QWORD PTR[64+rdi]\n\tmov\tr11,QWORD PTR[72+rdi]\n\tmov\trcx,QWORD PTR[80+rdi]\n\tmov\trdx,QWORD PTR[88+rdi]\n\n\tmov\trbx,r8\n\tor\tr8,r9\n\tor\tr8,r10\n\tor\tr8,r11\n\tor\tr8,rcx\n\tor\tr8,rdx\n\n\tlea\trax,QWORD PTR[rdi]\n\txor\trdi,rdi\n\tmov\trbp,rbx\n\tadd\trbx,rbx\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trcx,rcx\n\tadc\trdx,rdx\n\tadc\trdi,0\n\n\tsub\trbx,QWORD PTR[rsi]\n\tsbb\tr9,QWORD PTR[8+rsi]\n\tsbb\tr10,QWORD PTR[16+rsi]\n\tsbb\tr11,QWORD PTR[24+rsi]\n\tsbb\trcx,QWORD PTR[32+rsi]\n\tsbb\trdx,QWORD PTR[40+rsi]\n\tsbb\trdi,0\n\n\tmov\tQWORD PTR[rsp],r8\n\tnot\trdi\n\tand\trbp,1\n\tand\trdi,2\n\tor\trdi,rbp\n\n\tmov\tr8,QWORD PTR[rax]\n\tmov\tr9,QWORD PTR[8+rax]\n\tmov\tr10,QWORD PTR[16+rax]\n\tmov\tr11,QWORD PTR[24+rax]\n\tmov\trcx,QWORD PTR[32+rax]\n\tmov\trdx,QWORD PTR[40+rax]\n\n\tmov\trbx,r8\n\tor\tr8,r9\n\tor\tr8,r10\n\tor\tr8,r11\n\tor\tr8,rcx\n\tor\tr8,rdx\n\n\txor\trax,rax\n\tmov\trbp,rbx\n\tadd\trbx,rbx\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trcx,rcx\n\tadc\trdx,rdx\n\tadc\trax,0\n\n\tsub\trbx,QWORD PTR[rsi]\n\tsbb\tr9,QWORD PTR[8+rsi]\n\tsbb\tr10,QWORD PTR[16+rsi]\n\tsbb\tr11,QWORD PTR[24+rsi]\n\tsbb\trcx,QWORD PTR[32+rsi]\n\tsbb\trdx,QWORD PTR[40+rsi]\n\tsbb\trax,0\n\n\tmov\trbx,QWORD PTR[rsp]\n\n\tnot\trax\n\n\ttest\tr8,r8\n\tcmovz\trbp,rdi\n\n\ttest\trbx,rbx\n\tcmovnz\trax,rdi\n\n\tand\trbp,1\n\tand\trax,2\n\tor\trax,rbp\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_sgn0_pty_mod_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sgn0_pty_mod_384x::\nsgn0_pty_mod_384x\tENDP\nPUBLIC\tvec_select_32\n\n\nALIGN\t32\nvec_select_32\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tmovd\txmm5,r9d\n\tpxor\txmm4,xmm4\n\tpshufd\txmm5,xmm5,0\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rdx]\n\tlea\trdx,QWORD PTR[16+rdx]\n\tpcmpeqd\txmm5,xmm4\n\tmovdqu\txmm1,XMMWORD PTR[r8]\n\tlea\tr8,QWORD PTR[16+r8]\n\tpcmpeqd\txmm4,xmm5\n\tlea\trcx,QWORD PTR[16+rcx]\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((0+16-16))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((0+16-16))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(0-16)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tpand\txmm3,xmm5\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(16-16)+rcx],xmm2\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_select_32\tENDP\nPUBLIC\tvec_select_48\n\n\nALIGN\t32\nvec_select_48\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tmovd\txmm5,r9d\n\tpxor\txmm4,xmm4\n\tpshufd\txmm5,xmm5,0\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rdx]\n\tlea\trdx,QWORD PTR[24+rdx]\n\tpcmpeqd\txmm5,xmm4\n\tmovdqu\txmm1,XMMWORD PTR[r8]\n\tlea\tr8,QWORD PTR[24+r8]\n\tpcmpeqd\txmm4,xmm5\n\tlea\trcx,QWORD PTR[24+rcx]\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((0+16-24))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((0+16-24))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(0-24)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((16+16-24))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((16+16-24))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(16-24)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tpand\txmm1,xmm5\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(32-24)+rcx],xmm0\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_select_48\tENDP\nPUBLIC\tvec_select_96\n\n\nALIGN\t32\nvec_select_96\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tmovd\txmm5,r9d\n\tpxor\txmm4,xmm4\n\tpshufd\txmm5,xmm5,0\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rdx]\n\tlea\trdx,QWORD PTR[48+rdx]\n\tpcmpeqd\txmm5,xmm4\n\tmovdqu\txmm1,XMMWORD PTR[r8]\n\tlea\tr8,QWORD PTR[48+r8]\n\tpcmpeqd\txmm4,xmm5\n\tlea\trcx,QWORD PTR[48+rcx]\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((0+16-48))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((0+16-48))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(0-48)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((16+16-48))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((16+16-48))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(16-48)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((32+16-48))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((32+16-48))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(32-48)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((48+16-48))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((48+16-48))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(48-48)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((64+16-48))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((64+16-48))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(64-48)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tpand\txmm3,xmm5\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(80-48)+rcx],xmm2\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_select_96\tENDP\nPUBLIC\tvec_select_192\n\n\nALIGN\t32\nvec_select_192\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tmovd\txmm5,r9d\n\tpxor\txmm4,xmm4\n\tpshufd\txmm5,xmm5,0\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rdx]\n\tlea\trdx,QWORD PTR[96+rdx]\n\tpcmpeqd\txmm5,xmm4\n\tmovdqu\txmm1,XMMWORD PTR[r8]\n\tlea\tr8,QWORD PTR[96+r8]\n\tpcmpeqd\txmm4,xmm5\n\tlea\trcx,QWORD PTR[96+rcx]\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((0+16-96))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((0+16-96))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(0-96)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((16+16-96))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((16+16-96))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(16-96)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((32+16-96))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((32+16-96))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(32-96)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((48+16-96))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((48+16-96))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(48-96)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((64+16-96))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((64+16-96))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(64-96)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((80+16-96))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((80+16-96))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(80-96)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((96+16-96))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((96+16-96))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(96-96)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((112+16-96))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((112+16-96))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(112-96)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((128+16-96))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((128+16-96))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(128-96)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((144+16-96))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((144+16-96))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(144-96)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((160+16-96))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((160+16-96))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(160-96)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tpand\txmm3,xmm5\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(176-96)+rcx],xmm2\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_select_192\tENDP\nPUBLIC\tvec_select_144\n\n\nALIGN\t32\nvec_select_144\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tmovd\txmm5,r9d\n\tpxor\txmm4,xmm4\n\tpshufd\txmm5,xmm5,0\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rdx]\n\tlea\trdx,QWORD PTR[72+rdx]\n\tpcmpeqd\txmm5,xmm4\n\tmovdqu\txmm1,XMMWORD PTR[r8]\n\tlea\tr8,QWORD PTR[72+r8]\n\tpcmpeqd\txmm4,xmm5\n\tlea\trcx,QWORD PTR[72+rcx]\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((0+16-72))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((0+16-72))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(0-72)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((16+16-72))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((16+16-72))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(16-72)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((32+16-72))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((32+16-72))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(32-72)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((48+16-72))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((48+16-72))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(48-72)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((64+16-72))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((64+16-72))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(64-72)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((80+16-72))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((80+16-72))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(80-72)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((96+16-72))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((96+16-72))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(96-72)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((112+16-72))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((112+16-72))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(112-72)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tpand\txmm1,xmm5\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(128-72)+rcx],xmm0\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_select_144\tENDP\nPUBLIC\tvec_select_288\n\n\nALIGN\t32\nvec_select_288\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tmovd\txmm5,r9d\n\tpxor\txmm4,xmm4\n\tpshufd\txmm5,xmm5,0\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rdx]\n\tlea\trdx,QWORD PTR[144+rdx]\n\tpcmpeqd\txmm5,xmm4\n\tmovdqu\txmm1,XMMWORD PTR[r8]\n\tlea\tr8,QWORD PTR[144+r8]\n\tpcmpeqd\txmm4,xmm5\n\tlea\trcx,QWORD PTR[144+rcx]\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((0+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((0+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(0-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((16+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((16+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(16-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((32+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((32+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(32-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((48+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((48+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(48-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((64+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((64+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(64-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((80+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((80+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(80-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((96+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((96+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(96-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((112+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((112+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(112-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((128+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((128+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(128-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((144+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((144+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(144-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((160+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((160+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(160-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((176+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((176+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(176-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((192+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((192+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(192-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((208+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((208+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(208-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((224+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((224+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(224-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tmovdqu\txmm0,XMMWORD PTR[((240+16-144))+rdx]\n\tpand\txmm3,xmm5\n\tmovdqu\txmm1,XMMWORD PTR[((240+16-144))+r8]\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(240-144)+rcx],xmm2\n\tpand\txmm0,xmm4\n\tmovdqu\txmm2,XMMWORD PTR[((256+16-144))+rdx]\n\tpand\txmm1,xmm5\n\tmovdqu\txmm3,XMMWORD PTR[((256+16-144))+r8]\n\tpor\txmm0,xmm1\n\tmovdqu\tXMMWORD PTR[(256-144)+rcx],xmm0\n\tpand\txmm2,xmm4\n\tpand\txmm3,xmm5\n\tpor\txmm2,xmm3\n\tmovdqu\tXMMWORD PTR[(272-144)+rcx],xmm2\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_select_288\tENDP\nPUBLIC\tvec_prefetch\n\n\nALIGN\t32\nvec_prefetch\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tlea\trdx,QWORD PTR[((-1))+rdx*1+rcx]\n\tmov\trax,64\n\txor\tr8,r8\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tprefetchnta\t[rcx]\n\tlea\trcx,QWORD PTR[rax*1+rcx]\n\tcmp\trcx,rdx\n\tcmova\trcx,rdx\n\tcmova\trax,r8\n\tprefetchnta\t[rcx]\n\tlea\trcx,QWORD PTR[rax*1+rcx]\n\tcmp\trcx,rdx\n\tcmova\trcx,rdx\n\tcmova\trax,r8\n\tprefetchnta\t[rcx]\n\tlea\trcx,QWORD PTR[rax*1+rcx]\n\tcmp\trcx,rdx\n\tcmova\trcx,rdx\n\tcmova\trax,r8\n\tprefetchnta\t[rcx]\n\tlea\trcx,QWORD PTR[rax*1+rcx]\n\tcmp\trcx,rdx\n\tcmova\trcx,rdx\n\tcmova\trax,r8\n\tprefetchnta\t[rcx]\n\tlea\trcx,QWORD PTR[rax*1+rcx]\n\tcmp\trcx,rdx\n\tcmova\trcx,rdx\n\tcmova\trax,r8\n\tprefetchnta\t[rcx]\n\tlea\trcx,QWORD PTR[rax*1+rcx]\n\tcmp\trcx,rdx\n\tcmova\trcx,rdx\n\tprefetchnta\t[rcx]\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_prefetch\tENDP\nPUBLIC\tvec_is_zero_16x\n\n\nALIGN\t32\nvec_is_zero_16x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tshr\tedx,4\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rcx]\n\tlea\trcx,QWORD PTR[16+rcx]\n\n$L$oop_is_zero::\n\tdec\tedx\n\tjz\t$L$oop_is_zero_done\n\tmovdqu\txmm1,XMMWORD PTR[rcx]\n\tlea\trcx,QWORD PTR[16+rcx]\n\tpor\txmm0,xmm1\n\tjmp\t$L$oop_is_zero\n\n$L$oop_is_zero_done::\n\tpshufd\txmm1,xmm0,04eh\n\tpor\txmm0,xmm1\nDB\t102,72,15,126,192\n\tinc\tedx\n\ttest\trax,rax\n\tcmovnz\teax,edx\n\txor\teax,1\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_is_zero_16x\tENDP\nPUBLIC\tvec_is_equal_16x\n\n\nALIGN\t32\nvec_is_equal_16x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\n\tshr\tr8d,4\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmovdqu\txmm0,XMMWORD PTR[rcx]\n\tmovdqu\txmm1,XMMWORD PTR[rdx]\n\tsub\trdx,rcx\n\tlea\trcx,QWORD PTR[16+rcx]\n\tpxor\txmm0,xmm1\n\n$L$oop_is_equal::\n\tdec\tr8d\n\tjz\t$L$oop_is_equal_done\n\tmovdqu\txmm1,XMMWORD PTR[rcx]\n\tmovdqu\txmm2,XMMWORD PTR[rdx*1+rcx]\n\tlea\trcx,QWORD PTR[16+rcx]\n\tpxor\txmm1,xmm2\n\tpor\txmm0,xmm1\n\tjmp\t$L$oop_is_equal\n\n$L$oop_is_equal_done::\n\tpshufd\txmm1,xmm0,04eh\n\tpor\txmm0,xmm1\nDB\t102,72,15,126,192\n\tinc\tr8d\n\ttest\trax,rax\n\tcmovnz\teax,r8d\n\txor\teax,1\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nvec_is_equal_16x\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_add_mod_384\n\tDD\timagerel $L$SEH_body_add_mod_384\n\tDD\timagerel $L$SEH_info_add_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_add_mod_384\n\tDD\timagerel $L$SEH_epilogue_add_mod_384\n\tDD\timagerel $L$SEH_info_add_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_add_mod_384\n\tDD\timagerel $L$SEH_end_add_mod_384\n\tDD\timagerel $L$SEH_info_add_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_add_mod_384x\n\tDD\timagerel $L$SEH_body_add_mod_384x\n\tDD\timagerel $L$SEH_info_add_mod_384x_prologue\n\n\tDD\timagerel $L$SEH_body_add_mod_384x\n\tDD\timagerel $L$SEH_epilogue_add_mod_384x\n\tDD\timagerel $L$SEH_info_add_mod_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_add_mod_384x\n\tDD\timagerel $L$SEH_end_add_mod_384x\n\tDD\timagerel $L$SEH_info_add_mod_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_rshift_mod_384\n\tDD\timagerel $L$SEH_body_rshift_mod_384\n\tDD\timagerel $L$SEH_info_rshift_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_rshift_mod_384\n\tDD\timagerel $L$SEH_epilogue_rshift_mod_384\n\tDD\timagerel $L$SEH_info_rshift_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_rshift_mod_384\n\tDD\timagerel $L$SEH_end_rshift_mod_384\n\tDD\timagerel $L$SEH_info_rshift_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_div_by_2_mod_384\n\tDD\timagerel $L$SEH_body_div_by_2_mod_384\n\tDD\timagerel $L$SEH_info_div_by_2_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_div_by_2_mod_384\n\tDD\timagerel $L$SEH_epilogue_div_by_2_mod_384\n\tDD\timagerel $L$SEH_info_div_by_2_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_div_by_2_mod_384\n\tDD\timagerel $L$SEH_end_div_by_2_mod_384\n\tDD\timagerel $L$SEH_info_div_by_2_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_lshift_mod_384\n\tDD\timagerel $L$SEH_body_lshift_mod_384\n\tDD\timagerel $L$SEH_info_lshift_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_lshift_mod_384\n\tDD\timagerel $L$SEH_epilogue_lshift_mod_384\n\tDD\timagerel $L$SEH_info_lshift_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_lshift_mod_384\n\tDD\timagerel $L$SEH_end_lshift_mod_384\n\tDD\timagerel $L$SEH_info_lshift_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_by_3_mod_384\n\tDD\timagerel $L$SEH_body_mul_by_3_mod_384\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_mul_by_3_mod_384\n\tDD\timagerel $L$SEH_epilogue_mul_by_3_mod_384\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_by_3_mod_384\n\tDD\timagerel $L$SEH_end_mul_by_3_mod_384\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_by_8_mod_384\n\tDD\timagerel $L$SEH_body_mul_by_8_mod_384\n\tDD\timagerel $L$SEH_info_mul_by_8_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_mul_by_8_mod_384\n\tDD\timagerel $L$SEH_epilogue_mul_by_8_mod_384\n\tDD\timagerel $L$SEH_info_mul_by_8_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_by_8_mod_384\n\tDD\timagerel $L$SEH_end_mul_by_8_mod_384\n\tDD\timagerel $L$SEH_info_mul_by_8_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_by_3_mod_384x\n\tDD\timagerel $L$SEH_body_mul_by_3_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_384x_prologue\n\n\tDD\timagerel $L$SEH_body_mul_by_3_mod_384x\n\tDD\timagerel $L$SEH_epilogue_mul_by_3_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_by_3_mod_384x\n\tDD\timagerel $L$SEH_end_mul_by_3_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_3_mod_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_by_8_mod_384x\n\tDD\timagerel $L$SEH_body_mul_by_8_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_8_mod_384x_prologue\n\n\tDD\timagerel $L$SEH_body_mul_by_8_mod_384x\n\tDD\timagerel $L$SEH_epilogue_mul_by_8_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_8_mod_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_by_8_mod_384x\n\tDD\timagerel $L$SEH_end_mul_by_8_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_8_mod_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_cneg_mod_384\n\tDD\timagerel $L$SEH_body_cneg_mod_384\n\tDD\timagerel $L$SEH_info_cneg_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_cneg_mod_384\n\tDD\timagerel $L$SEH_epilogue_cneg_mod_384\n\tDD\timagerel $L$SEH_info_cneg_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_cneg_mod_384\n\tDD\timagerel $L$SEH_end_cneg_mod_384\n\tDD\timagerel $L$SEH_info_cneg_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sub_mod_384\n\tDD\timagerel $L$SEH_body_sub_mod_384\n\tDD\timagerel $L$SEH_info_sub_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_sub_mod_384\n\tDD\timagerel $L$SEH_epilogue_sub_mod_384\n\tDD\timagerel $L$SEH_info_sub_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sub_mod_384\n\tDD\timagerel $L$SEH_end_sub_mod_384\n\tDD\timagerel $L$SEH_info_sub_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sub_mod_384x\n\tDD\timagerel $L$SEH_body_sub_mod_384x\n\tDD\timagerel $L$SEH_info_sub_mod_384x_prologue\n\n\tDD\timagerel $L$SEH_body_sub_mod_384x\n\tDD\timagerel $L$SEH_epilogue_sub_mod_384x\n\tDD\timagerel $L$SEH_info_sub_mod_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_sub_mod_384x\n\tDD\timagerel $L$SEH_end_sub_mod_384x\n\tDD\timagerel $L$SEH_info_sub_mod_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_by_1_plus_i_mod_384x\n\tDD\timagerel $L$SEH_body_mul_by_1_plus_i_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_1_plus_i_mod_384x_prologue\n\n\tDD\timagerel $L$SEH_body_mul_by_1_plus_i_mod_384x\n\tDD\timagerel $L$SEH_epilogue_mul_by_1_plus_i_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_1_plus_i_mod_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_by_1_plus_i_mod_384x\n\tDD\timagerel $L$SEH_end_mul_by_1_plus_i_mod_384x\n\tDD\timagerel $L$SEH_info_mul_by_1_plus_i_mod_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_sgn0_pty_mod_384\n\tDD\timagerel $L$SEH_body_sgn0_pty_mod_384\n\tDD\timagerel $L$SEH_info_sgn0_pty_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_sgn0_pty_mod_384\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mod_384\n\tDD\timagerel $L$SEH_info_sgn0_pty_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mod_384\n\tDD\timagerel $L$SEH_end_sgn0_pty_mod_384\n\tDD\timagerel $L$SEH_info_sgn0_pty_mod_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sgn0_pty_mod_384x\n\tDD\timagerel $L$SEH_body_sgn0_pty_mod_384x\n\tDD\timagerel $L$SEH_info_sgn0_pty_mod_384x_prologue\n\n\tDD\timagerel $L$SEH_body_sgn0_pty_mod_384x\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mod_384x\n\tDD\timagerel $L$SEH_info_sgn0_pty_mod_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mod_384x\n\tDD\timagerel $L$SEH_end_sgn0_pty_mod_384x\n\tDD\timagerel $L$SEH_info_sgn0_pty_mod_384x_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_add_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_add_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_add_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_add_mod_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_add_mod_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,003h,000h\nDB\t000h,0e4h,004h,000h\nDB\t000h,0d4h,005h,000h\nDB\t000h,0c4h,006h,000h\nDB\t000h,034h,007h,000h\nDB\t000h,054h,008h,000h\nDB\t000h,074h,00ah,000h\nDB\t000h,064h,00bh,000h\nDB\t000h,082h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_add_mod_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_rshift_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_rshift_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_rshift_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_div_by_2_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_div_by_2_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_div_by_2_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_lshift_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_lshift_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_lshift_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_by_3_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_by_3_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_by_3_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_by_8_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_by_8_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_by_8_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_by_3_mod_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_by_3_mod_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_by_3_mod_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_by_8_mod_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_by_8_mod_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_by_8_mod_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_cneg_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_cneg_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_cneg_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sub_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sub_mod_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sub_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sub_mod_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sub_mod_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,003h,000h\nDB\t000h,0e4h,004h,000h\nDB\t000h,0d4h,005h,000h\nDB\t000h,0c4h,006h,000h\nDB\t000h,034h,007h,000h\nDB\t000h,054h,008h,000h\nDB\t000h,074h,00ah,000h\nDB\t000h,064h,00bh,000h\nDB\t000h,082h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sub_mod_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_by_1_plus_i_mod_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_by_1_plus_i_mod_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,007h,000h\nDB\t000h,0e4h,008h,000h\nDB\t000h,0d4h,009h,000h\nDB\t000h,0c4h,00ah,000h\nDB\t000h,034h,00bh,000h\nDB\t000h,054h,00ch,000h\nDB\t000h,074h,00eh,000h\nDB\t000h,064h,00fh,000h\nDB\t000h,0c2h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_by_1_plus_i_mod_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sgn0_pty_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sgn0_pty_mod_384_body::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sgn0_pty_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sgn0_pty_mod_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sgn0_pty_mod_384x_body::\nDB\t1,0,9,0\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sgn0_pty_mod_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/add_mod_384x384-x86_64.asm",
    "content": "OPTION\tDOTNAME\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tadd_mod_384x384\n\n\nALIGN\t32\nadd_mod_384x384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_add_mod_384x384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_add_mod_384x384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tadd\tr8,QWORD PTR[rdx]\n\tmov\tr15,QWORD PTR[56+rsi]\n\tadc\tr9,QWORD PTR[8+rdx]\n\tmov\trax,QWORD PTR[64+rsi]\n\tadc\tr10,QWORD PTR[16+rdx]\n\tmov\trbx,QWORD PTR[72+rsi]\n\tadc\tr11,QWORD PTR[24+rdx]\n\tmov\trbp,QWORD PTR[80+rsi]\n\tadc\tr12,QWORD PTR[32+rdx]\n\tmov\trsi,QWORD PTR[88+rsi]\n\tadc\tr13,QWORD PTR[40+rdx]\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr14,QWORD PTR[48+rdx]\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr15,QWORD PTR[56+rdx]\n\tmov\tQWORD PTR[16+rdi],r10\n\tadc\trax,QWORD PTR[64+rdx]\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tr8,r14\n\tadc\trbx,QWORD PTR[72+rdx]\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tr9,r15\n\tadc\trbp,QWORD PTR[80+rdx]\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tr10,rax\n\tadc\trsi,QWORD PTR[88+rdx]\n\tmov\tr11,rbx\n\tsbb\trdx,rdx\n\n\tsub\tr14,QWORD PTR[rcx]\n\tsbb\tr15,QWORD PTR[8+rcx]\n\tmov\tr12,rbp\n\tsbb\trax,QWORD PTR[16+rcx]\n\tsbb\trbx,QWORD PTR[24+rcx]\n\tsbb\trbp,QWORD PTR[32+rcx]\n\tmov\tr13,rsi\n\tsbb\trsi,QWORD PTR[40+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr14,r8\n\tcmovc\tr15,r9\n\tcmovc\trax,r10\n\tmov\tQWORD PTR[48+rdi],r14\n\tcmovc\trbx,r11\n\tmov\tQWORD PTR[56+rdi],r15\n\tcmovc\trbp,r12\n\tmov\tQWORD PTR[64+rdi],rax\n\tcmovc\trsi,r13\n\tmov\tQWORD PTR[72+rdi],rbx\n\tmov\tQWORD PTR[80+rdi],rbp\n\tmov\tQWORD PTR[88+rdi],rsi\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_add_mod_384x384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_add_mod_384x384::\nadd_mod_384x384\tENDP\n\nPUBLIC\tsub_mod_384x384\n\n\nALIGN\t32\nsub_mod_384x384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sub_mod_384x384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sub_mod_384x384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\tr15,QWORD PTR[56+rsi]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\trax,QWORD PTR[64+rsi]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trbx,QWORD PTR[72+rsi]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbp,QWORD PTR[80+rsi]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trsi,QWORD PTR[88+rsi]\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tmov\tQWORD PTR[rdi],r8\n\tsbb\tr14,QWORD PTR[48+rdx]\n\tmov\tr8,QWORD PTR[rcx]\n\tmov\tQWORD PTR[8+rdi],r9\n\tsbb\tr15,QWORD PTR[56+rdx]\n\tmov\tr9,QWORD PTR[8+rcx]\n\tmov\tQWORD PTR[16+rdi],r10\n\tsbb\trax,QWORD PTR[64+rdx]\n\tmov\tr10,QWORD PTR[16+rcx]\n\tmov\tQWORD PTR[24+rdi],r11\n\tsbb\trbx,QWORD PTR[72+rdx]\n\tmov\tr11,QWORD PTR[24+rcx]\n\tmov\tQWORD PTR[32+rdi],r12\n\tsbb\trbp,QWORD PTR[80+rdx]\n\tmov\tr12,QWORD PTR[32+rcx]\n\tmov\tQWORD PTR[40+rdi],r13\n\tsbb\trsi,QWORD PTR[88+rdx]\n\tmov\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,rdx\n\n\tand\tr8,rdx\n\tand\tr9,rdx\n\tand\tr10,rdx\n\tand\tr11,rdx\n\tand\tr12,rdx\n\tand\tr13,rdx\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tmov\tQWORD PTR[48+rdi],r14\n\tadc\trax,r10\n\tmov\tQWORD PTR[56+rdi],r15\n\tadc\trbx,r11\n\tmov\tQWORD PTR[64+rdi],rax\n\tadc\trbp,r12\n\tmov\tQWORD PTR[72+rdi],rbx\n\tadc\trsi,r13\n\tmov\tQWORD PTR[80+rdi],rbp\n\tmov\tQWORD PTR[88+rdi],rsi\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sub_mod_384x384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sub_mod_384x384::\nsub_mod_384x384\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_add_mod_384x384\n\tDD\timagerel $L$SEH_body_add_mod_384x384\n\tDD\timagerel $L$SEH_info_add_mod_384x384_prologue\n\n\tDD\timagerel $L$SEH_body_add_mod_384x384\n\tDD\timagerel $L$SEH_epilogue_add_mod_384x384\n\tDD\timagerel $L$SEH_info_add_mod_384x384_body\n\n\tDD\timagerel $L$SEH_epilogue_add_mod_384x384\n\tDD\timagerel $L$SEH_end_add_mod_384x384\n\tDD\timagerel $L$SEH_info_add_mod_384x384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sub_mod_384x384\n\tDD\timagerel $L$SEH_body_sub_mod_384x384\n\tDD\timagerel $L$SEH_info_sub_mod_384x384_prologue\n\n\tDD\timagerel $L$SEH_body_sub_mod_384x384\n\tDD\timagerel $L$SEH_epilogue_sub_mod_384x384\n\tDD\timagerel $L$SEH_info_sub_mod_384x384_body\n\n\tDD\timagerel $L$SEH_epilogue_sub_mod_384x384\n\tDD\timagerel $L$SEH_end_sub_mod_384x384\n\tDD\timagerel $L$SEH_info_sub_mod_384x384_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_add_mod_384x384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_add_mod_384x384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_add_mod_384x384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sub_mod_384x384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sub_mod_384x384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sub_mod_384x384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/blst.def",
    "content": "LIBRARY blst\n\nEXPORTS\n\tblst_scalar_from_uint32\n\tblst_uint32_from_scalar\n\tblst_scalar_from_uint64\n\tblst_uint64_from_scalar\n\tblst_scalar_from_bendian\n\tblst_bendian_from_scalar\n\tblst_scalar_from_lendian\n\tblst_lendian_from_scalar\n\tblst_scalar_fr_check\n\tblst_sk_check\n\tblst_sk_add_n_check\n\tblst_sk_sub_n_check\n\tblst_sk_mul_n_check\n\tblst_sk_inverse\n\tblst_scalar_from_le_bytes\n\tblst_scalar_from_be_bytes\n\tblst_fr_add\n\tblst_fr_sub\n\tblst_fr_mul_by_3\n\tblst_fr_lshift\n\tblst_fr_rshift\n\tblst_fr_mul\n\tblst_fr_sqr\n\tblst_fr_cneg\n\tblst_fr_eucl_inverse\n\tblst_fr_inverse\n\tblst_fr_from_uint64\n\tblst_uint64_from_fr\n\tblst_fr_from_scalar\n\tblst_scalar_from_fr\n\tblst_fp_add\n\tblst_fp_sub\n\tblst_fp_mul_by_3\n\tblst_fp_mul_by_8\n\tblst_fp_lshift\n\tblst_fp_mul\n\tblst_fp_sqr\n\tblst_fp_cneg\n\tblst_fp_eucl_inverse\n\tblst_fp_inverse\n\tblst_fp_sqrt\n\tblst_fp_from_uint32\n\tblst_uint32_from_fp\n\tblst_fp_from_uint64\n\tblst_uint64_from_fp\n\tblst_fp_from_bendian\n\tblst_bendian_from_fp\n\tblst_fp_from_lendian\n\tblst_lendian_from_fp\n\tblst_fp2_add\n\tblst_fp2_sub\n\tblst_fp2_mul_by_3\n\tblst_fp2_mul_by_8\n\tblst_fp2_lshift\n\tblst_fp2_mul\n\tblst_fp2_sqr\n\tblst_fp2_cneg\n\tblst_fp2_eucl_inverse\n\tblst_fp2_inverse\n\tblst_fp2_sqrt\n\tblst_fp12_sqr\n\tblst_fp12_cyclotomic_sqr\n\tblst_fp12_mul\n\tblst_fp12_mul_by_xy00z0\n\tblst_fp12_conjugate\n\tblst_fp12_inverse\n\tblst_fp12_frobenius_map\n\tblst_fp12_is_equal\n\tblst_fp12_is_one\n\tblst_fp12_in_group\n\tblst_fp12_one\n\tblst_p1_add\n\tblst_p1_add_or_double\n\tblst_p1_add_affine\n\tblst_p1_add_or_double_affine\n\tblst_p1_double\n\tblst_p1_mult\n\tblst_p1_cneg\n\tblst_p1_to_affine\n\tblst_p1_from_affine\n\tblst_p1_on_curve\n\tblst_p1_in_g1\n\tblst_p1_is_equal\n\tblst_p1_is_inf\n\tblst_p1_generator\n\tblst_p1_affine_on_curve\n\tblst_p1_affine_in_g1\n\tblst_p1_affine_is_equal\n\tblst_p1_affine_is_inf\n\tblst_p1_affine_generator\n\tblst_p2_add\n\tblst_p2_add_or_double\n\tblst_p2_add_affine\n\tblst_p2_add_or_double_affine\n\tblst_p2_double\n\tblst_p2_mult\n\tblst_p2_cneg\n\tblst_p2_to_affine\n\tblst_p2_from_affine\n\tblst_p2_on_curve\n\tblst_p2_in_g2\n\tblst_p2_is_equal\n\tblst_p2_is_inf\n\tblst_p2_generator\n\tblst_p2_affine_on_curve\n\tblst_p2_affine_in_g2\n\tblst_p2_affine_is_equal\n\tblst_p2_affine_is_inf\n\tblst_p2_affine_generator\n\tblst_p1s_to_affine\n\tblst_p1s_add\n\tblst_p1s_mult_wbits_precompute_sizeof\n\tblst_p1s_mult_wbits_precompute\n\tblst_p1s_mult_wbits_scratch_sizeof\n\tblst_p1s_mult_wbits\n\tblst_p1s_mult_pippenger_scratch_sizeof\n\tblst_p1s_mult_pippenger\n\tblst_p1s_tile_pippenger\n\tblst_p2s_to_affine\n\tblst_p2s_add\n\tblst_p2s_mult_wbits_precompute_sizeof\n\tblst_p2s_mult_wbits_precompute\n\tblst_p2s_mult_wbits_scratch_sizeof\n\tblst_p2s_mult_wbits\n\tblst_p2s_mult_pippenger_scratch_sizeof\n\tblst_p2s_mult_pippenger\n\tblst_p2s_tile_pippenger\n\tblst_map_to_g1\n\tblst_map_to_g2\n\tblst_encode_to_g1\n\tblst_hash_to_g1\n\tblst_encode_to_g2\n\tblst_hash_to_g2\n\tblst_p1_serialize\n\tblst_p1_compress\n\tblst_p1_affine_serialize\n\tblst_p1_affine_compress\n\tblst_p1_uncompress\n\tblst_p1_deserialize\n\tblst_p2_serialize\n\tblst_p2_compress\n\tblst_p2_affine_serialize\n\tblst_p2_affine_compress\n\tblst_p2_uncompress\n\tblst_p2_deserialize\n\tblst_keygen\n\tblst_sk_to_pk_in_g1\n\tblst_sign_pk_in_g1\n\tblst_sk_to_pk_in_g2\n\tblst_sign_pk_in_g2\n\tblst_miller_loop\n\tblst_miller_loop_n\n\tblst_final_exp\n\tblst_precompute_lines\n\tblst_miller_loop_lines\n\tblst_fp12_finalverify\n\tblst_pairing_sizeof\n\tblst_pairing_init\n\tblst_pairing_get_dst\n\tblst_pairing_commit\n\tblst_pairing_aggregate_pk_in_g2\n\tblst_pairing_chk_n_aggr_pk_in_g2\n\tblst_pairing_mul_n_aggregate_pk_in_g2\n\tblst_pairing_chk_n_mul_n_aggr_pk_in_g2\n\tblst_pairing_aggregate_pk_in_g1\n\tblst_pairing_chk_n_aggr_pk_in_g1\n\tblst_pairing_mul_n_aggregate_pk_in_g1\n\tblst_pairing_chk_n_mul_n_aggr_pk_in_g1\n\tblst_pairing_merge\n\tblst_pairing_finalverify\n\tblst_aggregate_in_g1\n\tblst_aggregate_in_g2\n\tblst_aggregated_in_g1\n\tblst_aggregated_in_g2\n\tblst_core_verify_pk_in_g1\n\tblst_core_verify_pk_in_g2\n\tBLS12_381_G1\n\tBLS12_381_NEG_G1\n\tBLS12_381_G2\n\tBLS12_381_NEG_G2\n\tblst_fr_ct_bfly\n\tblst_fr_gs_bfly\n\tblst_fr_to\n\tblst_fr_from\n\tblst_fp_to\n\tblst_fp_from\n\tblst_fp_is_square\n\tblst_fp2_is_square\n\tblst_p1_from_jacobian\n\tblst_p2_from_jacobian\n\tblst_sk_to_pk2_in_g1\n\tblst_sign_pk2_in_g1\n\tblst_sk_to_pk2_in_g2\n\tblst_sign_pk2_in_g2\n\tblst_uniq_sizeof\n\tblst_uniq_init\n\tblst_uniq_test\n\tblst_expand_message_xmd\n\tblst_p1_unchecked_mult\n\tblst_p2_unchecked_mult\n\tblst_pairing_raw_aggregate\n\tblst_pairing_as_fp12\n\tblst_bendian_from_fp12\n\tblst_keygen_v3\n\tblst_keygen_v4_5\n\tblst_keygen_v5\n\tblst_derive_master_eip2333\n\tblst_derive_child_eip2333\n\tblst_scalar_from_hexascii\n\tblst_fr_from_hexascii\n\tblst_fp_from_hexascii\n\tblst_p1_sizeof\n\tblst_p1_affine_sizeof\n\tblst_p2_sizeof\n\tblst_p2_affine_sizeof\n\tblst_fp12_sizeof\n\tblst_fp_from_le_bytes\n\tblst_fp_from_be_bytes\n\tblst_sha256\n\n"
  },
  {
    "path": "build/win64/ct_inverse_mod_256-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|ct_inverse_mod_256|[FUNC]\n\tALIGN\t32\n|ct_inverse_mod_256| PROC\n\thint\t#25\n\tstp\tx29, x30, [sp,#-10*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1040\n\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx6, x7, [x1,#8*2]\n\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#16+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n else\n\tadd\tx1, sp, #16+511\n\tand\tx1, x1, #-512\n endif\n\tstr\tx0, [sp]\n\n\tldp\tx8, x9, [x2,#8*0]\n\tldp\tx10, x11, [x2,#8*2]\n\n\tstp\tx4, x5, [x1,#8*0]\n\tstp\tx6, x7, [x1,#8*2]\n\tstp\tx8, x9, [x1,#8*4]\n\tstp\tx10, x11, [x1,#8*6]\n\n\n\tbl\t|$Lab_approximation_31_256_loaded|\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12,[x0,#8*8]\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\tstr\tx12, [x0,#8*10]\n\n\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tldr\tx8, [x1,#8*8]\n\tldr\tx9, [x1,#8*14]\n\tmadd\tx4, x16, x8, xzr\n\tmadd\tx4, x17, x9, x4\n\tasr\tx5, x4, #63\n\tstp\tx4, x5, [x0,#8*4]\n\tstp\tx5, x5, [x0,#8*6]\n\n\tmadd\tx4, x12, x8, xzr\n\tmadd\tx4, x13, x9, x4\n\tasr\tx5, x4, #63\n\tstp\tx4, x5, [x0,#8*10]\n\tstp\tx5, x5, [x0,#8*12]\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\n\tstr\tx24, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tasr\tx24, x24, #63\n\tstp\tx24, x24, [x0,#8*4]\n\tstp\tx24, x24, [x0,#8*6]\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tbl\t__ab_approximation_31_256\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\tx16, x12\n\tmov\tx17, x13\n\n\tmov\tx12, x14\n\tmov\tx13, x15\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256_n_shift_by_31\n\n\tadd\tx0,x0,#8*4\n\tbl\t__smul_256x63\n\tadc\tx22, x22, x23\n\tstr\tx22, [x0,#8*4]\n\tmov\tx16, x12\n\tmov\tx17, x13\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #47\n\n\tldr\tx7, [x1,#8*0]\n\tldr\tx11, [x1,#8*4]\n\tbl\t__inner_loop_62_256\n\n\tmov\tx16, x14\n\tmov\tx17, x15\n\tldr\tx0, [sp]\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx20, x7, x17\n\tldp\tx8, x9, [x3,#8*0]\n\tadc\tx23, x23, x25\n\tldp\tx10, x11, [x3,#8*2]\n\n\tadd\tx20, x20, x23\n\tasr\tx19, x20, #63\n\n\tand\tx23,   x8, x19\n\tand\tx24,   x9, x19\n\tadds\tx4, x4, x23\n\tand\tx25,   x10, x19\n\tadcs\tx5, x5, x24\n\tand\tx26,   x11, x19\n\tadcs\tx6, x6, x25\n\tadcs\tx7, x22,   x26\n\tadc\tx20, x20, xzr\n\n\tneg\tx19, x20\n\torr\tx20, x20, x19\n\tasr\tx19, x19, #63\n\n\tand\tx8, x8, x20\n\tand\tx9, x9, x20\n\tand\tx10, x10, x20\n\tand\tx11, x11, x20\n\n\teor\tx8, x8, x19\n\teor\tx9, x9, x19\n\tadds\tx8, x8, x19, lsr#63\n\teor\tx10, x10, x19\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x19\n\tadcs\tx10, x10, xzr\n\tadc\tx11, x11, xzr\n\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*4]\n\tadc\tx7, x7, x11\n\tstp\tx6, x7, [x0,#8*6]\n\n\tadd\tsp, sp, #1040\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#10*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tALIGN\t32\n|__smul_256x63| PROC\n\tldp\tx4, x5, [x1,#8*0+64]\n\tasr\tx14, x16, #63\n\tldp\tx6, x7, [x1,#8*2+64]\n\teor\tx16, x16, x14\n\tldr\tx22, [x1,#8*4+64]\n\n\teor\tx4, x4, x14\n\tsub\tx16, x16, x14\n\teor\tx5, x5, x14\n\tadds\tx4, x4, x14, lsr#63\n\teor\tx6, x6, x14\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\teor\tx22, x22, x14\n\tumulh\tx19, x4, x16\n\tadcs\tx7, x7, xzr\n\tumulh\tx20, x5, x16\n\tadcs\tx22, x22, xzr\n\tumulh\tx21, x6, x16\n\tmul\tx4, x4, x16\n\tcmp\tx16, #0\n\tmul\tx5, x5, x16\n\tcselne\tx22,x22,xzr\n\tmul\tx6, x6, x16\n\tadds\tx5, x5, x19\n\tmul\tx24, x7, x16\n\tadcs\tx6, x6, x20\n\tadcs\tx24, x24, x21\n\tadc\tx26, xzr, xzr\n\tldp\tx8, x9, [x1,#8*0+112]\n\tasr\tx14, x17, #63\n\tldp\tx10, x11, [x1,#8*2+112]\n\teor\tx17, x17, x14\n\tldr\tx23, [x1,#8*4+112]\n\n\teor\tx8, x8, x14\n\tsub\tx17, x17, x14\n\teor\tx9, x9, x14\n\tadds\tx8, x8, x14, lsr#63\n\teor\tx10, x10, x14\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x14\n\tadcs\tx10, x10, xzr\n\teor\tx23, x23, x14\n\tumulh\tx19, x8, x17\n\tadcs\tx11, x11, xzr\n\tumulh\tx20, x9, x17\n\tadcs\tx23, x23, xzr\n\tumulh\tx21, x10, x17\n\tadc\tx15, xzr, xzr\n\tmul\tx8, x8, x17\n\tcmp\tx17, #0\n\tmul\tx9, x9, x17\n\tcselne\tx23,x23,xzr\n\tmul\tx10, x10, x17\n\tadds\tx9, x9, x19\n\tmul\tx25, x11, x17\n\tadcs\tx10, x10, x20\n\tadcs\tx25, x25, x21\n\tadc\tx26, x26, xzr\n\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tstp\tx4, x5, [x0,#8*0]\n\tadcs\tx24,   x24,   x25\n\tstp\tx6, x24, [x0,#8*2]\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__smul_512x63_tail| PROC\n\tumulh\tx24, x7, x16\n\tldr\tx5, [x1,#8*19]\n\tadc\tx26, x26, xzr\n\tldp\tx6, x7, [x1,#8*20]\n\tand\tx22, x22, x16\n\n\tumulh\tx11, x11, x17\n\n\tsub\tx24, x24, x22\n\tasr\tx25, x24, #63\n\n\teor\tx5, x5, x14\n\teor\tx6, x6, x14\n\tadds\tx5, x5, x15\n\teor\tx7, x7, x14\n\tadcs\tx6, x6, xzr\n\tumulh\tx19, x23,   x17\n\tadc\tx7, x7, xzr\n\tumulh\tx20, x5, x17\n\tadd\tx11, x11, x26\n\tumulh\tx21, x6, x17\n\n\tmul\tx4, x23,   x17\n\tmul\tx5, x5, x17\n\tadds\tx4, x4, x11\n\tmul\tx6, x6, x17\n\tadcs\tx5, x5, x19\n\tmul\tx22,   x7, x17\n\tadcs\tx6, x6, x20\n\tadcs\tx22,   x22,   x21\n\tadc\tx23, xzr, xzr\n\n\tadds\tx4, x4, x24\n\tadcs\tx5, x5, x25\n\tadcs\tx6, x6, x25\n\tstp\tx4, x5, [x0,#8*4]\n\tadcs\tx22,   x22,   x25\n\tstp\tx6, x22,   [x0,#8*6]\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__smul_256_n_shift_by_31| PROC\n\tldp\tx4, x5, [x1,#8*0+0]\n\tasr\tx24, x12, #63\n\tldp\tx6, x7, [x1,#8*2+0]\n\teor\tx25, x12, x24\n\n\teor\tx4, x4, x24\n\tsub\tx25, x25, x24\n\teor\tx5, x5, x24\n\tadds\tx4, x4, x24, lsr#63\n\teor\tx6, x6, x24\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x24\n\tumulh\tx19, x4, x25\n\tadcs\tx6, x6, xzr\n\tumulh\tx20, x5, x25\n\tadc\tx7, x7, xzr\n\tumulh\tx21, x6, x25\n\tand\tx24, x24, x25\n\tumulh\tx22, x7, x25\n\tneg\tx24, x24\n\n\tmul\tx4, x4, x25\n\tmul\tx5, x5, x25\n\tmul\tx6, x6, x25\n\tadds\tx5, x5, x19\n\tmul\tx7, x7, x25\n\tadcs\tx6, x6, x20\n\tadcs\tx7, x7, x21\n\tadc\tx22, x22, x24\n\tldp\tx8, x9, [x1,#8*0+32]\n\tasr\tx24, x13, #63\n\tldp\tx10, x11, [x1,#8*2+32]\n\teor\tx25, x13, x24\n\n\teor\tx8, x8, x24\n\tsub\tx25, x25, x24\n\teor\tx9, x9, x24\n\tadds\tx8, x8, x24, lsr#63\n\teor\tx10, x10, x24\n\tadcs\tx9, x9, xzr\n\teor\tx11, x11, x24\n\tumulh\tx19, x8, x25\n\tadcs\tx10, x10, xzr\n\tumulh\tx20, x9, x25\n\tadc\tx11, x11, xzr\n\tumulh\tx21, x10, x25\n\tand\tx24, x24, x25\n\tumulh\tx23, x11, x25\n\tneg\tx24, x24\n\n\tmul\tx8, x8, x25\n\tmul\tx9, x9, x25\n\tmul\tx10, x10, x25\n\tadds\tx9, x9, x19\n\tmul\tx11, x11, x25\n\tadcs\tx10, x10, x20\n\tadcs\tx11, x11, x21\n\tadc\tx23, x23, x24\n\tadds\tx4, x4, x8\n\tadcs\tx5, x5, x9\n\tadcs\tx6, x6, x10\n\tadcs\tx7, x7, x11\n\tadc\tx8, x22,   x23\n\n\textr\tx4, x5, x4, #31\n\textr\tx5, x6, x5, #31\n\textr\tx6, x7, x6, #31\n\tasr\tx23, x8, #63\n\textr\tx7, x8, x7, #31\n\n\teor\tx4, x4, x23\n\teor\tx5, x5, x23\n\tadds\tx4, x4, x23, lsr#63\n\teor\tx6, x6, x23\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x23\n\tadcs\tx6, x6, xzr\n\tstp\tx4, x5, [x0,#8*0]\n\tadc\tx7, x7, xzr\n\tstp\tx6, x7, [x0,#8*2]\n\n\teor\tx12, x12, x23\n\teor\tx13, x13, x23\n\tsub\tx12, x12, x23\n\tsub\tx13, x13, x23\n\n\tret\n\tENDP\n\n\tALIGN\t16\n|__ab_approximation_31_256| PROC\n\tldp\tx6, x7, [x1,#8*2]\n\tldp\tx10, x11, [x1,#8*6]\n\tldp\tx4, x5, [x1,#8*0]\n\tldp\tx8, x9, [x1,#8*4]\n\n|$Lab_approximation_31_256_loaded|\n\torr\tx19, x7, x11\n\tcmp\tx19, #0\n\tcselne\tx7,x7,x6\n\tcselne\tx11,x11,x10\n\tcselne\tx6,x6,x5\n\torr\tx19, x7, x11\n\tcselne\tx10,x10,x9\n\n\tcmp\tx19, #0\n\tcselne\tx7,x7,x6\n\tcselne\tx11,x11,x10\n\tcselne\tx6,x6,x4\n\torr\tx19, x7, x11\n\tcselne\tx10,x10,x8\n\n\tclz\tx19, x19\n\tcmp\tx19, #64\n\tcselne\tx19,x19,xzr\n\tcselne\tx7,x7,x6\n\tcselne\tx11,x11,x10\n\tneg\tx20, x19\n\n\tlslv\tx7, x7, x19\n\tlslv\tx11, x11, x19\n\tlsrv\tx6, x6, x20\n\tlsrv\tx10, x10, x20\n\tand\tx6, x6, x20, asr#6\n\tand\tx10, x10, x20, asr#6\n\torr\tx7, x7, x6\n\torr\tx11, x11, x10\n\n\tbfxil\tx7, x4, #0, #31\n\tbfxil\tx11, x8, #0, #31\n\n\tb\t__inner_loop_31_256\n\tret\n\tENDP\n\n\n\tALIGN\t16\n|__inner_loop_31_256| PROC\n\tmov\tx2, #31\n\tmov\tx13, #0x7FFFFFFF80000000\n\tmov\tx15, #0x800000007FFFFFFF\n\tmov\tx23,#0x7FFFFFFF7FFFFFFF\n\n|$Loop_31_256|\n\tsbfx\tx22, x7, #0, #1\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\n\tsubs\tx21, x7, x19\n\tmov\tx19, x15\n\tcselhs\tx11,x11,x7\n\tcselhs\tx7,x21,x20\n\tcselhs\tx15,x15,x13\n\tcselhs\tx13,x13,x19\n\tlsr\tx7, x7, #1\n\tand\tx19, x15, x22\n\tand\tx20, x23, x22\n\tsub\tx13, x13, x19\n\tadd\tx15, x15, x15\n\tadd\tx13, x13, x20\n\tsub\tx15, x15, x23\n\tcbnz\tx2, |$Loop_31_256|\n\n\tmov\tx23, #0x7FFFFFFF\n\tubfx\tx12, x13, #0, #32\n\tubfx\tx13, x13, #32, #32\n\tubfx\tx14, x15, #0, #32\n\tubfx\tx15, x15, #32, #32\n\tsub\tx12, x12, x23\n\tsub\tx13, x13, x23\n\tsub\tx14, x14, x23\n\tsub\tx15, x15, x23\n\n\tret\n\tENDP\n\n\n\tALIGN\t16\n|__inner_loop_62_256| PROC\n\tmov\tx12, #1\n\tmov\tx13, #0\n\tmov\tx14, #0\n\tmov\tx15, #1\n\n|$Loop_62_256|\n\tsbfx\tx22, x7, #0, #1\n\tsub\tx2, x2, #1\n\tand\tx19, x11, x22\n\tsub\tx20, x11, x7\n\tsubs\tx21, x7, x19\n\tmov\tx19, x12\n\tcselhs\tx11,x11,x7\n\tcselhs\tx7,x21,x20\n\tmov\tx20, x13\n\tcselhs\tx12,x12,x14\n\tcselhs\tx14,x14,x19\n\tcselhs\tx13,x13,x15\n\tcselhs\tx15,x15,x20\n\tlsr\tx7, x7, #1\n\tand\tx19, x14, x22\n\tand\tx20, x15, x22\n\tadd\tx14, x14, x14\n\tadd\tx15, x15, x15\n\tsub\tx12, x12, x19\n\tsub\tx13, x13, x20\n\tcbnz\tx2, |$Loop_62_256|\n\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/ct_inverse_mod_256-x86_64.asm",
    "content": "OPTION\tDOTNAME\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tct_inverse_mod_256\n\n\nALIGN\t32\nct_inverse_mod_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_ct_inverse_mod_256::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,1072\n\n$L$SEH_body_ct_inverse_mod_256::\n\n\n\tlea\trax,QWORD PTR[((48+511))+rsp]\n\tand\trax,-512\n\tmov\tQWORD PTR[32+rsp],rdi\n\tmov\tQWORD PTR[40+rsp],rcx\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\n\tmov\tr12,QWORD PTR[rdx]\n\tmov\tr13,QWORD PTR[8+rdx]\n\tmov\tr14,QWORD PTR[16+rdx]\n\tmov\tr15,QWORD PTR[24+rdx]\n\n\tmov\tQWORD PTR[rax],r8\n\tmov\tQWORD PTR[8+rax],r9\n\tmov\tQWORD PTR[16+rax],r10\n\tmov\tQWORD PTR[24+rax],r11\n\n\tmov\tQWORD PTR[32+rax],r12\n\tmov\tQWORD PTR[40+rax],r13\n\tmov\tQWORD PTR[48+rax],r14\n\tmov\tQWORD PTR[56+rax],r15\n\tmov\trsi,rax\n\n\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmov\tQWORD PTR[64+rdi],rdx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\tmov\tQWORD PTR[72+rdi],rdx\n\n\n\txor\trsi,256\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\n\n\n\tmov\tr8,QWORD PTR[64+rsi]\n\tmov\tr12,QWORD PTR[104+rsi]\n\tmov\tr9,r8\n\timul\tr8,QWORD PTR[rsp]\n\tmov\tr13,r12\n\timul\tr12,QWORD PTR[8+rsp]\n\tadd\tr8,r12\n\tmov\tQWORD PTR[32+rdi],r8\n\tsar\tr8,63\n\tmov\tQWORD PTR[40+rdi],r8\n\tmov\tQWORD PTR[48+rdi],r8\n\tmov\tQWORD PTR[56+rdi],r8\n\tmov\tQWORD PTR[64+rdi],r8\n\tlea\trsi,QWORD PTR[64+rsi]\n\n\timul\tr9,rdx\n\timul\tr13,rcx\n\tadd\tr9,r13\n\tmov\tQWORD PTR[72+rdi],r9\n\tsar\tr9,63\n\tmov\tQWORD PTR[80+rdi],r9\n\tmov\tQWORD PTR[88+rdi],r9\n\tmov\tQWORD PTR[96+rdi],r9\n\tmov\tQWORD PTR[104+rdi],r9\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_256x63\n\tsar\trbp,63\n\tmov\tQWORD PTR[40+rdi],rbp\n\tmov\tQWORD PTR[48+rdi],rbp\n\tmov\tQWORD PTR[56+rdi],rbp\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_512x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_512x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_512x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_512x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_512x63\n\txor\trsi,256+8*8\n\tmov\tedx,31\n\tcall\t__ab_approximation_31_256\n\n\n\tmov\tQWORD PTR[16+rsp],r12\n\tmov\tQWORD PTR[24+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[64+rsi]\n\tlea\trdi,QWORD PTR[32+rdi]\n\tcall\t__smulq_256x63\n\n\tmov\trdx,QWORD PTR[16+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tlea\trdi,QWORD PTR[40+rdi]\n\tcall\t__smulq_512x63\n\n\txor\trsi,256+8*8\n\tmov\tedx,47\n\n\tmov\tr8,QWORD PTR[rsi]\n\n\tmov\tr10,QWORD PTR[32+rsi]\n\n\tcall\t__inner_loop_62_256\n\n\n\n\n\n\n\n\tlea\trsi,QWORD PTR[64+rsi]\n\n\n\n\n\n\tmov\trdx,r12\n\tmov\trcx,r13\n\tmov\trdi,QWORD PTR[32+rsp]\n\tcall\t__smulq_512x63\n\tadc\trdx,rbp\n\n\tmov\trsi,QWORD PTR[40+rsp]\n\tmov\trax,rdx\n\tsar\trdx,63\n\n\tmov\tr8,rdx\n\tmov\tr9,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tand\tr8,QWORD PTR[rsi]\n\tmov\tr10,rdx\n\tand\tr9,QWORD PTR[8+rsi]\n\tand\tr10,QWORD PTR[16+rsi]\n\tand\trdx,QWORD PTR[24+rsi]\n\n\tadd\tr12,r8\n\tadc\tr13,r9\n\tadc\tr14,r10\n\tadc\tr15,rdx\n\tadc\trax,0\n\n\tmov\trdx,rax\n\tneg\trax\n\tor\trdx,rax\n\tsar\trax,63\n\n\tmov\tr8,rdx\n\tmov\tr9,rdx\n\tand\tr8,QWORD PTR[rsi]\n\tmov\tr10,rdx\n\tand\tr9,QWORD PTR[8+rsi]\n\tand\tr10,QWORD PTR[16+rsi]\n\tand\trdx,QWORD PTR[24+rsi]\n\n\txor\tr8,rax\n\txor\trcx,rcx\n\txor\tr9,rax\n\tsub\trcx,rax\n\txor\tr10,rax\n\txor\trdx,rax\n\tadd\tr8,rcx\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\trdx,0\n\n\tadd\tr12,r8\n\tadc\tr13,r9\n\tadc\tr14,r10\n\tadc\tr15,rdx\n\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\n\tlea\tr8,QWORD PTR[1072+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_ct_inverse_mod_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_ct_inverse_mod_256::\nct_inverse_mod_256\tENDP\n\nALIGN\t32\n__smulq_512x63\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\trbp,QWORD PTR[32+rsi]\n\n\tmov\trbx,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbx,rdx\n\tadd\trbx,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\trbp,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\trbp,0\n\n\tmul\trbx\n\tmov\tQWORD PTR[rdi],rax\n\tmov\trax,r9\n\tmov\tr9,rdx\n\tmul\trbx\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tr10,rdx\n\tmul\trbx\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tr11,rdx\n\tand\trbp,rbx\n\tneg\trbp\n\tmul\trbx\n\tadd\tr11,rax\n\tadc\trbp,rdx\n\tmov\tQWORD PTR[24+rdi],r11\n\n\tmov\tr8,QWORD PTR[40+rsi]\n\tmov\tr9,QWORD PTR[48+rsi]\n\tmov\tr10,QWORD PTR[56+rsi]\n\tmov\tr11,QWORD PTR[64+rsi]\n\tmov\tr12,QWORD PTR[72+rsi]\n\tmov\tr13,QWORD PTR[80+rsi]\n\tmov\tr14,QWORD PTR[88+rsi]\n\tmov\tr15,QWORD PTR[96+rsi]\n\n\tmov\trdx,rcx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trcx,rdx\n\tadd\trcx,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\txor\tr14,rdx\n\txor\tr15,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\tadc\tr15,0\n\n\tmul\trcx\n\tmov\tr8,rax\n\tmov\trax,r9\n\tmov\tr9,rdx\n\tmul\trcx\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trcx\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trcx\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trcx\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmul\trcx\n\tadd\tr13,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\tmul\trcx\n\tadd\tr14,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\timul\trcx\n\tadd\tr15,rax\n\tadc\trdx,0\n\n\tmov\trbx,rbp\n\tsar\trbp,63\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,rbx\n\tadc\tr13,rbp\n\tadc\tr14,rbp\n\tadc\tr15,rbp\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_512x63\tENDP\n\n\nALIGN\t32\n__smulq_256x63\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[((0+0))+rsi]\n\tmov\tr9,QWORD PTR[((0+8))+rsi]\n\tmov\tr10,QWORD PTR[((0+16))+rsi]\n\tmov\tr11,QWORD PTR[((0+24))+rsi]\n\tmov\trbp,QWORD PTR[((0+32))+rsi]\n\n\tmov\trbx,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbx,rdx\n\tadd\trbx,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\trbp,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\trbp,0\n\n\tmul\trbx\n\tmov\tr8,rax\n\tmov\trax,r9\n\tmov\tr9,rdx\n\tmul\trbx\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbx\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tand\trbp,rbx\n\tneg\trbp\n\tmul\trbx\n\tadd\tr11,rax\n\tadc\trbp,rdx\n\tmov\trdx,rcx\n\tmov\tr12,QWORD PTR[((40+0))+rsi]\n\tmov\tr13,QWORD PTR[((40+8))+rsi]\n\tmov\tr14,QWORD PTR[((40+16))+rsi]\n\tmov\tr15,QWORD PTR[((40+24))+rsi]\n\tmov\trcx,QWORD PTR[((40+32))+rsi]\n\n\tmov\trbx,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbx,rdx\n\tadd\trbx,rax\n\n\txor\tr12,rdx\n\txor\tr13,rdx\n\txor\tr14,rdx\n\txor\tr15,rdx\n\txor\trcx,rdx\n\tadd\trax,r12\n\tadc\tr13,0\n\tadc\tr14,0\n\tadc\tr15,0\n\tadc\trcx,0\n\n\tmul\trbx\n\tmov\tr12,rax\n\tmov\trax,r13\n\tmov\tr13,rdx\n\tmul\trbx\n\tadd\tr13,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\tmul\trbx\n\tadd\tr14,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\tand\trcx,rbx\n\tneg\trcx\n\tmul\trbx\n\tadd\tr15,rax\n\tadc\trcx,rdx\n\tadd\tr8,r12\n\tadc\tr9,r13\n\tadc\tr10,r14\n\tadc\tr11,r15\n\tadc\trbp,rcx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_256x63\tENDP\n\nALIGN\t32\n__smulq_256_n_shift_by_31\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tQWORD PTR[rdi],rdx\n\tmov\tQWORD PTR[8+rdi],rcx\n\tmov\trbp,rdx\n\tmov\tr8,QWORD PTR[((0+0))+rsi]\n\tmov\tr9,QWORD PTR[((0+8))+rsi]\n\tmov\tr10,QWORD PTR[((0+16))+rsi]\n\tmov\tr11,QWORD PTR[((0+24))+rsi]\n\n\tmov\trbx,rbp\n\tsar\trbp,63\n\txor\trax,rax\n\tsub\trax,rbp\n\n\txor\trbx,rbp\n\tadd\trbx,rax\n\n\txor\tr8,rbp\n\txor\tr9,rbp\n\txor\tr10,rbp\n\txor\tr11,rbp\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\n\tmul\trbx\n\tmov\tr8,rax\n\tmov\trax,r9\n\tand\trbp,rbx\n\tneg\trbp\n\tmov\tr9,rdx\n\tmul\trbx\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbx\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbx\n\tadd\tr11,rax\n\tadc\trbp,rdx\n\tmov\tr12,QWORD PTR[((32+0))+rsi]\n\tmov\tr13,QWORD PTR[((32+8))+rsi]\n\tmov\tr14,QWORD PTR[((32+16))+rsi]\n\tmov\tr15,QWORD PTR[((32+24))+rsi]\n\n\tmov\trbx,rcx\n\tsar\trcx,63\n\txor\trax,rax\n\tsub\trax,rcx\n\n\txor\trbx,rcx\n\tadd\trbx,rax\n\n\txor\tr12,rcx\n\txor\tr13,rcx\n\txor\tr14,rcx\n\txor\tr15,rcx\n\tadd\trax,r12\n\tadc\tr13,0\n\tadc\tr14,0\n\tadc\tr15,0\n\n\tmul\trbx\n\tmov\tr12,rax\n\tmov\trax,r13\n\tand\trcx,rbx\n\tneg\trcx\n\tmov\tr13,rdx\n\tmul\trbx\n\tadd\tr13,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\tmul\trbx\n\tadd\tr14,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\tmul\trbx\n\tadd\tr15,rax\n\tadc\trcx,rdx\n\tadd\tr8,r12\n\tadc\tr9,r13\n\tadc\tr10,r14\n\tadc\tr11,r15\n\tadc\trbp,rcx\n\n\tmov\trdx,QWORD PTR[rdi]\n\tmov\trcx,QWORD PTR[8+rdi]\n\n\tshrd\tr8,r9,31\n\tshrd\tr9,r10,31\n\tshrd\tr10,r11,31\n\tshrd\tr11,rbp,31\n\n\tsar\trbp,63\n\txor\trax,rax\n\tsub\trax,rbp\n\n\txor\tr8,rbp\n\txor\tr9,rbp\n\txor\tr10,rbp\n\txor\tr11,rbp\n\tadd\tr8,rax\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\txor\trdx,rbp\n\txor\trcx,rbp\n\tadd\trdx,rax\n\tadd\trcx,rax\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_256_n_shift_by_31\tENDP\n\nALIGN\t32\n__ab_approximation_31_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr9,QWORD PTR[24+rsi]\n\tmov\tr11,QWORD PTR[56+rsi]\n\tmov\trbx,QWORD PTR[16+rsi]\n\tmov\trbp,QWORD PTR[48+rsi]\n\tmov\tr8,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[40+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tmov\tr8,QWORD PTR[rsi]\n\tcmovz\trbp,r10\n\tmov\tr10,QWORD PTR[32+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tcmovz\trbp,r10\n\n\tmov\trax,r9\n\tor\trax,r11\n\tbsr\trcx,rax\n\tlea\trcx,QWORD PTR[1+rcx]\n\tcmovz\tr9,r8\n\tcmovz\tr11,r10\n\tcmovz\trcx,rax\n\tneg\trcx\n\n\n\tshld\tr9,rbx,cl\n\tshld\tr11,rbp,cl\n\n\tmov\teax,07FFFFFFFh\n\tand\tr8,rax\n\tand\tr10,rax\n\tnot\trax\n\tand\tr9,rax\n\tand\tr11,rax\n\tor\tr8,r9\n\tor\tr10,r11\n\n\tjmp\t__inner_loop_31_256\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__ab_approximation_31_256\tENDP\n\nALIGN\t32\n__inner_loop_31_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trcx,07FFFFFFF80000000h\n\tmov\tr13,0800000007FFFFFFFh\n\tmov\tr15,07FFFFFFF7FFFFFFFh\n\n$L$oop_31_256::\n\tcmp\tr8,r10\n\tmov\trax,r8\n\tmov\trbx,r10\n\tmov\trbp,rcx\n\tmov\tr14,r13\n\tcmovb\tr8,r10\n\tcmovb\tr10,rax\n\tcmovb\trcx,r13\n\tcmovb\tr13,rbp\n\n\tsub\tr8,r10\n\tsub\trcx,r13\n\tadd\trcx,r15\n\n\ttest\trax,1\n\tcmovz\tr8,rax\n\tcmovz\tr10,rbx\n\tcmovz\trcx,rbp\n\tcmovz\tr13,r14\n\n\tshr\tr8,1\n\tadd\tr13,r13\n\tsub\tr13,r15\n\tsub\tedx,1\n\tjnz\t$L$oop_31_256\n\n\tshr\tr15,32\n\tmov\tedx,ecx\n\tmov\tr12d,r13d\n\tshr\trcx,32\n\tshr\tr13,32\n\tsub\trdx,r15\n\tsub\trcx,r15\n\tsub\tr12,r15\n\tsub\tr13,r15\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__inner_loop_31_256\tENDP\n\n\nALIGN\t32\n__inner_loop_62_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr15d,edx\n\tmov\trdx,1\n\txor\trcx,rcx\n\txor\tr12,r12\n\tmov\tr13,rdx\n\tmov\tr14,rdx\n\n$L$oop_62_256::\n\txor\trax,rax\n\ttest\tr8,r14\n\tmov\trbx,r10\n\tcmovnz\trax,r10\n\tsub\trbx,r8\n\tmov\trbp,r8\n\tsub\tr8,rax\n\tcmovc\tr8,rbx\n\tcmovc\tr10,rbp\n\tmov\trax,rdx\n\tcmovc\trdx,r12\n\tcmovc\tr12,rax\n\tmov\trbx,rcx\n\tcmovc\trcx,r13\n\tcmovc\tr13,rbx\n\txor\trax,rax\n\txor\trbx,rbx\n\tshr\tr8,1\n\ttest\trbp,r14\n\tcmovnz\trax,r12\n\tcmovnz\trbx,r13\n\tadd\tr12,r12\n\tadd\tr13,r13\n\tsub\trdx,rax\n\tsub\trcx,rbx\n\tsub\tr15d,1\n\tjnz\t$L$oop_62_256\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__inner_loop_62_256\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_ct_inverse_mod_256\n\tDD\timagerel $L$SEH_body_ct_inverse_mod_256\n\tDD\timagerel $L$SEH_info_ct_inverse_mod_256_prologue\n\n\tDD\timagerel $L$SEH_body_ct_inverse_mod_256\n\tDD\timagerel $L$SEH_epilogue_ct_inverse_mod_256\n\tDD\timagerel $L$SEH_info_ct_inverse_mod_256_body\n\n\tDD\timagerel $L$SEH_epilogue_ct_inverse_mod_256\n\tDD\timagerel $L$SEH_end_ct_inverse_mod_256\n\tDD\timagerel $L$SEH_info_ct_inverse_mod_256_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_ct_inverse_mod_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_ct_inverse_mod_256_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,086h,000h\nDB\t000h,0e4h,087h,000h\nDB\t000h,0d4h,088h,000h\nDB\t000h,0c4h,089h,000h\nDB\t000h,034h,08ah,000h\nDB\t000h,054h,08bh,000h\nDB\t000h,074h,08dh,000h\nDB\t000h,064h,08eh,000h\nDB\t000h,001h,08ch,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_ct_inverse_mod_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/ct_inverse_mod_384-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|ct_inverse_mod_384|[FUNC]\n\tALIGN\t32\n|ct_inverse_mod_384| PROC\n\thint\t#25\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #1056\n\n\tldp\tx22,   x4, [x1,#8*0]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx7, x8, [x1,#8*4]\n\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tadd\tx1,sp,#32+511\n\talignd\tc1,c1,#9\n\tscbnds\tc1,c1,#512\n else\n\tadd\tx1, sp, #32+511\n\tand\tx1, x1, #-512\n endif\n\tstp\tx0, x3, [sp]\n\n\tldp\tx9, x10, [x2,#8*0]\n\tldp\tx11, x12, [x2,#8*2]\n\tldp\tx13, x14, [x2,#8*4]\n\n\tstp\tx22,   x4, [x1,#8*0]\n\tstp\tx5, x6, [x1,#8*2]\n\tstp\tx7, x8, [x1,#8*4]\n\tstp\tx9, x10, [x1,#8*6]\n\tstp\tx11, x12, [x1,#8*8]\n\tstp\tx13, x14, [x1,#8*10]\n\n\n\tmov\tx2, #62\n\tbl\t|$Lab_approximation_62_loaded|\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15,[x0,#8*12]\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\tstr\tx15, [x0,#8*14]\n\n\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tldr\tx7, [x1,#8*12]\n\tldr\tx8, [x1,#8*20]\n\tmul\tx3, x20, x7\n\tsmulh\tx4, x20, x7\n\tmul\tx5, x21, x8\n\tsmulh\tx6, x21, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*6]\n\tasr\tx5, x4, #63\n\tstp\tx5, x5, [x0,#8*8]\n\tstp\tx5, x5, [x0,#8*10]\n\n\tmul\tx3, x15, x7\n\tsmulh\tx4, x15, x7\n\tmul\tx5, x16, x8\n\tsmulh\tx6, x16, x8\n\tadds\tx3, x3, x5\n\tadc\tx4, x4, x6\n\tstp\tx3, x4, [x0,#8*14]\n\tasr\tx5, x4, #63\n\tstp\tx5, x5, [x0,#8*16]\n\tstp\tx5, x5, [x0,#8*18]\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\n\tstr\tx27, [x0,#8*6]\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tasr\tx27, x27, #63\n\tstp\tx27, x27, [x0,#8*6]\n\tstp\tx27, x27, [x0,#8*8]\n\tstp\tx27, x27, [x0,#8*10]\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\tbl\t__ab_approximation_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\tx20, x15\n\tmov\tx21, x16\n\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384_n_shift_by_62\n\n\tadd\tx0,x0,#8*6\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #62\n\n\tldp\tx3, x8, [x1,#8*0]\n\tldp\tx9, x14, [x1,#8*6]\n\tbl\t__inner_loop_62\n\n\teor\tx0, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,c1,x0\n endif\n\tstr\tx3, [x0,#8*0]\n\tstr\tx9, [x0,#8*6]\n\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tmov\tx15, x17\n\tmov\tx16, x19\n\tadd\tx0,x0,#8*12\n\tbl\t__smul_384x63\n\tadc\tx25, x25, x26\n\tstr\tx25, [x0,#8*6]\n\n\tmov\tx20, x15\n\tmov\tx21, x16\n\tadd\tx0,x0,#8*8\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\n\teor\tx1, x1, #256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,c0,x1\n endif\n\tmov\tx2, #24\n\n\tldr\tx3, [x1,#8*0]\n\teor\tx8, x8, x8\n\tldr\tx9, [x1,#8*6]\n\teor\tx14, x14, x14\n\tbl\t__inner_loop_62\n\n\tmov\tx20, x17\n\tmov\tx21, x19\n\tldp\tx0, x15, [sp]\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tsmulh\tx23, x8, x21\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x15,#8*0]\n\tadd\tx23, x23, x26\n\tldp\tx11, x12, [x15,#8*2]\n\tasr\tx22, x23, #63\n\tldp\tx13, x14, [x15,#8*4]\n\n\tand\tx26,   x9, x22\n\tand\tx27,   x10, x22\n\tadds\tx3, x3, x26\n\tand\tx28,   x11, x22\n\tadcs\tx4, x4, x27\n\tand\tx2,   x12, x22\n\tadcs\tx5, x5, x28\n\tand\tx26,   x13, x22\n\tadcs\tx6, x6, x2\n\tand\tx27,   x14, x22\n\tadcs\tx7, x7, x26\n\tadcs\tx8, x25,   x27\n\tadc\tx23, x23, xzr\n\n\tneg\tx22, x23\n\torr\tx23, x23, x22\n\tasr\tx22, x22, #63\n\n\tand\tx9, x9, x23\n\tand\tx10, x10, x23\n\tand\tx11, x11, x23\n\tand\tx12, x12, x23\n\tand\tx13, x13, x23\n\tand\tx14, x14, x23\n\n\teor\tx9,  x9, x22\n\teor\tx10,  x10, x22\n\tadds\tx9,  x9, x22, lsr#63\n\teor\tx11,  x11, x22\n\tadcs\tx10,  x10, xzr\n\teor\tx12,  x12, x22\n\tadcs\tx11,  x11, xzr\n\teor\tx13, x13, x22\n\tadcs\tx12,  x12, xzr\n\teor\tx14, x14, x22\n\tadcs\tx13, x13, xzr\n\tadc\tx14, x14, xzr\n\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*8]\n\tadc\tx8, x8, x14\n\tstp\tx7, x8, [x0,#8*10]\n\n\tadd\tsp, sp, #1056\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\n\tALIGN\t32\n|__smul_384x63| PROC\n\tldp\tx3, x4, [x1,#8*0+96]\n\tasr\tx17, x20, #63\n\tldp\tx5, x6, [x1,#8*2+96]\n\teor\tx20, x20, x17\n\tldp\tx7, x8, [x1,#8*4+96]\n\n\teor\tx3, x3, x17\n\tldr\tx25, [x1,#8*6+96]\n\tsub\tx20, x20, x17\n\teor\tx4, x4, x17\n\tadds\tx3, x3, x17, lsr#63\n\teor\tx5, x5, x17\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x17\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x3, x20\n\teor\tx8, x8, x17\n\tumulh\tx23, x4, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x5, x20\n\teor\tx25, x25, x17\n\tmul\tx3, x3, x20\n\tadcs\tx8, x8, xzr\n\tmul\tx4, x4, x20\n\tadcs\tx25, x25, xzr\n\tcmp\tx20, #0\n\tmul\tx5, x5, x20\n\tcselne\tx25,x25,xzr\n\tadds\tx4, x4, x22\n\tumulh\tx22, x6, x20\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x7, x20\n\tmul\tx6, x6, x20\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x24\n\tmul\tx27,x8, x20\n\tadcs\tx7, x7, x22\n\tadcs\tx27,x27,x23\n\tadc\tx2, xzr, xzr\n\tldp\tx9, x10, [x1,#8*0+160]\n\tasr\tx17, x21, #63\n\tldp\tx11, x12, [x1,#8*2+160]\n\teor\tx21, x21, x17\n\tldp\tx13, x14, [x1,#8*4+160]\n\n\teor\tx9, x9, x17\n\tldr\tx26, [x1,#8*6+160]\n\tsub\tx21, x21, x17\n\teor\tx10, x10, x17\n\tadds\tx9, x9, x17, lsr#63\n\teor\tx11, x11, x17\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x17\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x17\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x9, x21\n\teor\tx14, x14, x17\n\tumulh\tx23, x10, x21\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x11, x21\n\teor\tx26, x26, x17\n\tmul\tx9, x9, x21\n\tadcs\tx14, x14, xzr\n\tmul\tx10, x10, x21\n\tadcs\tx26, x26, xzr\n\tadc\tx19, xzr, xzr\n\tcmp\tx21, #0\n\tmul\tx11, x11, x21\n\tcselne\tx26,x26,xzr\n\tadds\tx10, x10, x22\n\tumulh\tx22, x12, x21\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x13, x21\n\tmul\tx12, x12, x21\n\tmul\tx13, x13, x21\n\tadcs\tx12, x12, x24\n\tmul\tx28,x14, x21\n\tadcs\tx13, x13, x22\n\tadcs\tx28,x28,x23\n\tadc\tx2, x2, xzr\n\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, x13\n\tstp\tx5, x6, [x0,#8*2]\n\tadcs\tx27,   x27,   x28\n\tstp\tx7, x27,   [x0,#8*4]\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__smul_768x63_tail| PROC\n\tumulh\tx27, x8, x20\n\tldr\tx4, [x1,#8*27]\n\tadc\tx2, x2, xzr\n\tldp\tx5, x6, [x1,#8*28]\n\tand\tx25, x25, x20\n\tldp\tx7, x8, [x1,#8*30]\n\tsub\tx27, x27, x25\n\n\tumulh\tx14, x14, x21\n\teor\tx4, x4, x17\n\teor\tx5, x5, x17\n\teor\tx6, x6, x17\n\tadds\tx4, x4, x19\n\teor\tx7, x7, x17\n\tadcs\tx5, x5, xzr\n\teor\tx8, x8, x17\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x26,   x21\n\tadcs\tx7, x7, xzr\n\tumulh\tx23, x4, x21\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x21\n\tadd\tx14, x14, x2\n\tumulh\tx25, x6, x21\n\tasr\tx28, x27, #63\n\tumulh\tx2, x7, x21\n\tmul\tx3, x26,   x21\n\tmul\tx4, x4, x21\n\tmul\tx5, x5, x21\n\tadds\tx3, x3, x14\n\tmul\tx6, x6, x21\n\tadcs\tx4, x4, x22\n\tmul\tx7, x7, x21\n\tadcs\tx5, x5, x23\n\tmul\tx22,   x8, x21\n\tadcs\tx6, x6, x24\n\tadcs\tx7, x7, x25\n\tadcs\tx25,   x22, x2\n\tadc\tx26, xzr, xzr\n\n\tadds\tx3, x3, x27\n\tadcs\tx4, x4, x28\n\tadcs\tx5, x5, x28\n\tadcs\tx6, x6, x28\n\tstp\tx3, x4, [x0,#8*6]\n\tadcs\tx7, x7, x28\n\tstp\tx5, x6, [x0,#8*8]\n\tadcs\tx25,   x25,   x28\n\tstp\tx7, x25,   [x0,#8*10]\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__smul_384_n_shift_by_62| PROC\n\tldp\tx3, x4, [x1,#8*0+0]\n\tasr\tx28, x15, #63\n\tldp\tx5, x6, [x1,#8*2+0]\n\teor\tx2, x15, x28\n\tldp\tx7, x8, [x1,#8*4+0]\n\n\teor\tx3, x3, x28\n\tsub\tx2, x2, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tumulh\tx22, x3, x2\n\tadcs\tx6, x6, xzr\n\tumulh\tx23, x4, x2\n\teor\tx8, x8, x28\n\tmul\tx3, x3, x2\n\tadcs\tx7, x7, xzr\n\tmul\tx4, x4, x2\n\tadc\tx8, x8, xzr\n\n\tumulh\tx24, x5, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x6, x2\n\tadds\tx4, x4, x22\n\tmul\tx5, x5, x2\n\tumulh\tx22, x7, x2\n\tneg\tx28, x28\n\tmul\tx6, x6, x2\n\tadcs\tx5, x5, x23\n\tumulh\tx23, x8, x2\n\tmul\tx7, x7, x2\n\tadcs\tx6, x6, x24\n\tmul\tx8, x8, x2\n\tadcs\tx7, x7, x25\n\tadcs\tx8, x8, x22\n\tadc\tx27, x23, x28\n\tldp\tx9, x10, [x1,#8*0+48]\n\tasr\tx28, x16, #63\n\tldp\tx11, x12, [x1,#8*2+48]\n\teor\tx2, x16, x28\n\tldp\tx13, x14, [x1,#8*4+48]\n\n\teor\tx9, x9, x28\n\tsub\tx2, x2, x28\n\teor\tx10, x10, x28\n\tadds\tx9, x9, x28, lsr#63\n\teor\tx11, x11, x28\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x28\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x28\n\tumulh\tx22, x9, x2\n\tadcs\tx12, x12, xzr\n\tumulh\tx23, x10, x2\n\teor\tx14, x14, x28\n\tmul\tx9, x9, x2\n\tadcs\tx13, x13, xzr\n\tmul\tx10, x10, x2\n\tadc\tx14, x14, xzr\n\n\tumulh\tx24, x11, x2\n\tand\tx28, x28, x2\n\tumulh\tx25, x12, x2\n\tadds\tx10, x10, x22\n\tmul\tx11, x11, x2\n\tumulh\tx22, x13, x2\n\tneg\tx28, x28\n\tmul\tx12, x12, x2\n\tadcs\tx11, x11, x23\n\tumulh\tx23, x14, x2\n\tmul\tx13, x13, x2\n\tadcs\tx12, x12, x24\n\tmul\tx14, x14, x2\n\tadcs\tx13, x13, x25\n\tadcs\tx14, x14, x22\n\tadc\tx28, x23, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x27,   x28\n\n\textr\tx3, x4, x3, #62\n\textr\tx4, x5, x4, #62\n\textr\tx5, x6, x5, #62\n\tasr\tx28, x9, #63\n\textr\tx6, x7, x6, #62\n\textr\tx7, x8, x7, #62\n\textr\tx8, x9, x8, #62\n\n\teor\tx3, x3, x28\n\teor\tx4, x4, x28\n\tadds\tx3, x3, x28, lsr#63\n\teor\tx5, x5, x28\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x28\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x28\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x28\n\tstp\tx3, x4, [x0,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x0,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x0,#8*4]\n\n\teor\tx15, x15, x28\n\teor\tx16, x16, x28\n\tsub\tx15, x15, x28\n\tsub\tx16, x16, x28\n\n\tret\n\tENDP\n\n\tALIGN\t16\n|__ab_approximation_62| PROC\n\tldp\tx7, x8, [x1,#8*4]\n\tldp\tx13, x14, [x1,#8*10]\n\tldp\tx5, x6, [x1,#8*2]\n\tldp\tx11, x12, [x1,#8*8]\n\n|$Lab_approximation_62_loaded|\n\torr\tx22, x8, x14\n\tcmp\tx22, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x6\n\torr\tx22, x8, x14\n\tcselne\tx13,x13,x12\n\n\tldp\tx3, x4, [x1,#8*0]\n\tldp\tx9, x10, [x1,#8*6]\n\n\tcmp\tx22, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x5\n\torr\tx22, x8, x14\n\tcselne\tx13,x13,x11\n\n\tcmp\tx22, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x4\n\torr\tx22, x8, x14\n\tcselne\tx13,x13,x10\n\n\tclz\tx22, x22\n\tcmp\tx22, #64\n\tcselne\tx22,x22,xzr\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tneg\tx23, x22\n\n\tlslv\tx8, x8, x22\n\tlslv\tx14, x14, x22\n\tlsrv\tx7, x7, x23\n\tlsrv\tx13, x13, x23\n\tand\tx7, x7, x23, asr#6\n\tand\tx13, x13, x23, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tb\t__inner_loop_62\n\tret\n\tENDP\n\n\tALIGN\t16\n|__inner_loop_62| PROC\n\tmov\tx15, #1\n\tmov\tx16, #0\n\tmov\tx17, #0\n\tmov\tx19, #1\n\n|$Loop_62|\n\tsbfx\tx28, x3, #0, #1\n\tsub\tx2, x2, #1\n\tsubs\tx24, x9, x3\n\tand\tx22, x9, x28\n\tsbc\tx25, x14, x8\n\tand\tx23, x14, x28\n\tsubs\tx26, x3, x22\n\tmov\tx22, x15\n\tsbcs\tx27, x8, x23\n\tmov\tx23, x16\n\tcselhs\tx9,x9,x3\n\tcselhs\tx14,x14,x8\n\tcselhs\tx3,x26,x24\n\tcselhs\tx8,x27,x25\n\tcselhs\tx15,x15,x17\n\tcselhs\tx17,x17,x22\n\tcselhs\tx16,x16,x19\n\tcselhs\tx19,x19,x23\n\textr\tx3, x8, x3, #1\n\tlsr\tx8, x8, #1\n\tand\tx22, x17, x28\n\tand\tx23, x19, x28\n\tadd\tx17, x17, x17\n\tadd\tx19, x19, x19\n\tsub\tx15, x15, x22\n\tsub\tx16, x16, x23\n\tcbnz\tx2, |$Loop_62|\n\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/ct_is_square_mod_384-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|ct_is_square_mod_384|[FUNC]\n\tALIGN\t32\n|ct_is_square_mod_384| PROC\n\thint\t#25\n\tstp\tx29, x30, [sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29, sp, #0\n\tstp\tx19, x20, [sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21, x22, [sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23, x24, [sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25, x26, [sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27, x28, [sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp, sp, #512\n\n\tldp\tx3, x4, [x0,#8*0]\n\tldp\tx5, x6, [x0,#8*2]\n\tldp\tx7, x8, [x0,#8*4]\n\n\tadd\tx0, sp, #255\n\tand\tx0, x0, #-256\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n endif\n\n\tldp\tx9, x10, [x1,#8*0]\n\tldp\tx11, x12, [x1,#8*2]\n\tldp\tx13, x14, [x1,#8*4]\n\n\tstp\tx3, x4, [x0,#8*6]\n\tstp\tx5, x6, [x0,#8*8]\n\tstp\tx7, x8, [x0,#8*10]\n\tstp\tx9, x10, [x0,#8*0]\n\tstp\tx11, x12, [x0,#8*2]\n\tstp\tx13, x14, [x0,#8*4]\n\n\teor\tx2, x2, x2\n\tmov\tx15, #24\n\tb\t|$Loop_is_square|\n\n\tALIGN\t16\n|$Loop_is_square|\n\tbl\t__ab_approximation_30\n\tsub\tx15, x15, #1\n\n\teor\tx1, x0, #128\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc1,csp,x1\n endif\n\tbl\t__smul_384_n_shift_by_30\n\n\tmov\tx19, x16\n\tmov\tx20, x17\n\tadd\tx1,x1,#8*6\n\tbl\t__smul_384_n_shift_by_30\n\n\tldp\tx9, x10, [x1,#-8*6]\n\teor\tx0, x0, #128\n if :def:\t__CHERI_PURE_CAPABILITY__\n\tscvalue\tc0,csp,x0\n endif\n\tand\tx27, x27, x9\n\tadd\tx2, x2, x27, lsr#1\n\n\tcbnz\tx15, |$Loop_is_square|\n\n\n\n\n\n\tmov\tx15, #48\n\tbl\t__inner_loop_48\n\tldr\tx30, [x29,#__SIZEOF_POINTER__]\n\n\tand\tx0, x2, #1\n\teor\tx0, x0, #1\n\n\tadd\tsp, sp, #512\n\tldp\tx19, x20, [x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21, x22, [x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23, x24, [x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25, x26, [x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27, x28, [x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29, [sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__smul_384_n_shift_by_30| PROC\n\tldp\tx3, x4, [x0,#8*0+0]\n\tasr\tx27, x20, #63\n\tldp\tx5, x6, [x0,#8*2+0]\n\teor\tx20, x20, x27\n\tldp\tx7, x8, [x0,#8*4+0]\n\n\teor\tx3, x3, x27\n\tsub\tx20, x20, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tumulh\tx21, x3, x20\n\tadcs\tx6, x6, xzr\n\tumulh\tx22, x4, x20\n\teor\tx8, x8, x27\n\tumulh\tx23, x5, x20\n\tadcs\tx7, x7, xzr\n\tumulh\tx24, x6, x20\n\tadc\tx8, x8, xzr\n\n\tumulh\tx25, x7, x20\n\tand\tx28, x20, x27\n\tumulh\tx26, x8, x20\n\tneg\tx28, x28\n\tmul\tx3, x3, x20\n\tmul\tx4, x4, x20\n\tmul\tx5, x5, x20\n\tadds\tx4, x4, x21\n\tmul\tx6, x6, x20\n\tadcs\tx5, x5, x22\n\tmul\tx7, x7, x20\n\tadcs\tx6, x6, x23\n\tmul\tx8, x8, x20\n\tadcs\tx7, x7, x24\n\tadcs\tx8, x8 ,x25\n\tadc\tx26, x26, x28\n\tldp\tx9, x10, [x0,#8*0+48]\n\tasr\tx27, x19, #63\n\tldp\tx11, x12, [x0,#8*2+48]\n\teor\tx19, x19, x27\n\tldp\tx13, x14, [x0,#8*4+48]\n\n\teor\tx9, x9, x27\n\tsub\tx19, x19, x27\n\teor\tx10, x10, x27\n\tadds\tx9, x9, x27, lsr#63\n\teor\tx11, x11, x27\n\tadcs\tx10, x10, xzr\n\teor\tx12, x12, x27\n\tadcs\tx11, x11, xzr\n\teor\tx13, x13, x27\n\tumulh\tx21, x9, x19\n\tadcs\tx12, x12, xzr\n\tumulh\tx22, x10, x19\n\teor\tx14, x14, x27\n\tumulh\tx23, x11, x19\n\tadcs\tx13, x13, xzr\n\tumulh\tx24, x12, x19\n\tadc\tx14, x14, xzr\n\n\tumulh\tx25, x13, x19\n\tand\tx28, x19, x27\n\tumulh\tx27, x14, x19\n\tneg\tx28, x28\n\tmul\tx9, x9, x19\n\tmul\tx10, x10, x19\n\tmul\tx11, x11, x19\n\tadds\tx10, x10, x21\n\tmul\tx12, x12, x19\n\tadcs\tx11, x11, x22\n\tmul\tx13, x13, x19\n\tadcs\tx12, x12, x23\n\tmul\tx14, x14, x19\n\tadcs\tx13, x13, x24\n\tadcs\tx14, x14 ,x25\n\tadc\tx27, x27, x28\n\tadds\tx3, x3, x9\n\tadcs\tx4, x4, x10\n\tadcs\tx5, x5, x11\n\tadcs\tx6, x6, x12\n\tadcs\tx7, x7, x13\n\tadcs\tx8, x8, x14\n\tadc\tx9, x26,   x27\n\n\textr\tx3, x4, x3, #30\n\textr\tx4, x5, x4, #30\n\textr\tx5, x6, x5, #30\n\tasr\tx27, x9, #63\n\textr\tx6, x7, x6, #30\n\textr\tx7, x8, x7, #30\n\textr\tx8, x9, x8, #30\n\n\teor\tx3, x3, x27\n\teor\tx4, x4, x27\n\tadds\tx3, x3, x27, lsr#63\n\teor\tx5, x5, x27\n\tadcs\tx4, x4, xzr\n\teor\tx6, x6, x27\n\tadcs\tx5, x5, xzr\n\teor\tx7, x7, x27\n\tadcs\tx6, x6, xzr\n\teor\tx8, x8, x27\n\tstp\tx3, x4, [x1,#8*0]\n\tadcs\tx7, x7, xzr\n\tstp\tx5, x6, [x1,#8*2]\n\tadc\tx8, x8, xzr\n\tstp\tx7, x8, [x1,#8*4]\n\n\tret\n\tENDP\n\n\tALIGN\t16\n|__ab_approximation_30| PROC\n\tldp\tx13, x14, [x0,#8*4]\n\tldp\tx11, x12, [x0,#8*2]\n\n\torr\tx21, x8, x14\n\tcmp\tx21, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x6\n\torr\tx21, x8, x14\n\tcselne\tx13,x13,x12\n\n\tcmp\tx21, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x5\n\torr\tx21, x8, x14\n\tcselne\tx13,x13,x11\n\n\tcmp\tx21, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x4\n\torr\tx21, x8, x14\n\tcselne\tx13,x13,x10\n\n\tcmp\tx21, #0\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tcselne\tx7,x7,x3\n\torr\tx21, x8, x14\n\tcselne\tx13,x13,x9\n\n\tclz\tx21, x21\n\tcmp\tx21, #64\n\tcselne\tx21,x21,xzr\n\tcselne\tx8,x8,x7\n\tcselne\tx14,x14,x13\n\tneg\tx22, x21\n\n\tlslv\tx8, x8, x21\n\tlslv\tx14, x14, x21\n\tlsrv\tx7, x7, x22\n\tlsrv\tx13, x13, x22\n\tand\tx7, x7, x22, asr#6\n\tand\tx13, x13, x22, asr#6\n\torr\tx8, x8, x7\n\torr\tx14, x14, x13\n\n\tbfxil\tx8, x3, #0, #32\n\tbfxil\tx14, x9, #0, #32\n\n\tb\t__inner_loop_30\n\tret\n\tENDP\n\n\n\tALIGN\t16\n|__inner_loop_30| PROC\n\tmov\tx28, #30\n\tmov\tx17, #0x7FFFFFFF80000000\n\tmov\tx20, #0x800000007FFFFFFF\n\tmov\tx27,#0x7FFFFFFF7FFFFFFF\n\n|$Loop_30|\n\tsbfx\tx24, x8, #0, #1\n\tand\tx25, x8, x14\n\tsub\tx28, x28, #1\n\tand\tx21, x14, x24\n\n\tsub\tx22, x14, x8\n\tsubs\tx23, x8, x21\n\tadd\tx25, x2, x25, lsr#1\n\tmov\tx21, x20\n\tcselhs\tx14,x14,x8\n\tcselhs\tx8,x23,x22\n\tcselhs\tx20,x20,x17\n\tcselhs\tx17,x17,x21\n\tcselhs\tx2,x2,x25\n\tlsr\tx8, x8, #1\n\tand\tx21, x20, x24\n\tand\tx22, x27, x24\n\tadd\tx23, x14, #2\n\tsub\tx17, x17, x21\n\tadd\tx20, x20, x20\n\tadd\tx2, x2, x23, lsr#2\n\tadd\tx17, x17, x22\n\tsub\tx20, x20, x27\n\n\tcbnz\tx28, |$Loop_30|\n\n\tmov\tx27, #0x7FFFFFFF\n\tubfx\tx16, x17, #0, #32\n\tubfx\tx17, x17, #32, #32\n\tubfx\tx19, x20, #0, #32\n\tubfx\tx20, x20, #32, #32\n\tsub\tx16, x16, x27\n\tsub\tx17, x17, x27\n\tsub\tx19, x19, x27\n\tsub\tx20, x20, x27\n\n\tret\n\tENDP\n\n\tALIGN\t16\n|__inner_loop_48| PROC\n|$Loop_48|\n\tsbfx\tx24, x3, #0, #1\n\tand\tx25, x3, x9\n\tsub\tx15, x15, #1\n\tand\tx21, x9, x24\n\tsub\tx22, x9, x3\n\tsubs\tx23, x3, x21\n\tadd\tx25, x2, x25, lsr#1\n\tcselhs\tx9,x9,x3\n\tcselhs\tx3,x23,x22\n\tcselhs\tx2,x2,x25\n\tadd\tx23, x9, #2\n\tlsr\tx3, x3, #1\n\tadd\tx2, x2, x23, lsr#2\n\n\tcbnz\tx15, |$Loop_48|\n\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/ct_is_square_mod_384-x86_64.asm",
    "content": "OPTION\tDOTNAME\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tct_is_square_mod_384\n\n\nALIGN\t32\nct_is_square_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_ct_is_square_mod_384::\n\n\n\tpush\trbp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,536\n\n$L$SEH_body_ct_is_square_mod_384::\n\n\n\tlea\trax,QWORD PTR[((24+255))+rsp]\n\tand\trax,-256\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rdi]\n\tmov\tr9,QWORD PTR[8+rdi]\n\tmov\tr10,QWORD PTR[16+rdi]\n\tmov\tr11,QWORD PTR[24+rdi]\n\tmov\tr12,QWORD PTR[32+rdi]\n\tmov\tr13,QWORD PTR[40+rdi]\n\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trbx,QWORD PTR[16+rsi]\n\tmov\trcx,QWORD PTR[24+rsi]\n\tmov\trdx,QWORD PTR[32+rsi]\n\tmov\trdi,QWORD PTR[40+rsi]\n\tmov\trsi,rax\n\n\tmov\tQWORD PTR[rax],r8\n\tmov\tQWORD PTR[8+rax],r9\n\tmov\tQWORD PTR[16+rax],r10\n\tmov\tQWORD PTR[24+rax],r11\n\tmov\tQWORD PTR[32+rax],r12\n\tmov\tQWORD PTR[40+rax],r13\n\n\tmov\tQWORD PTR[48+rax],r14\n\tmov\tQWORD PTR[56+rax],r15\n\tmov\tQWORD PTR[64+rax],rbx\n\tmov\tQWORD PTR[72+rax],rcx\n\tmov\tQWORD PTR[80+rax],rdx\n\tmov\tQWORD PTR[88+rax],rdi\n\n\txor\trbp,rbp\n\tmov\tecx,24\n\tjmp\t$L$oop_is_square\n\nALIGN\t32\n$L$oop_is_square::\n\tmov\tDWORD PTR[16+rsp],ecx\n\n\tcall\t__ab_approximation_30\n\tmov\tQWORD PTR[rsp],rax\n\tmov\tQWORD PTR[8+rsp],rbx\n\n\tmov\trdi,128+8*6\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmov\trdx,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmov\tecx,DWORD PTR[16+rsp]\n\txor\trsi,128\n\n\tand\tr14,QWORD PTR[48+rdi]\n\tshr\tr14,1\n\tadd\trbp,r14\n\n\tsub\tecx,1\n\tjnz\t$L$oop_is_square\n\n\n\n\n\tmov\tr9,QWORD PTR[48+rsi]\n\tcall\t__inner_loop_48\n\n\tmov\trax,1\n\tand\trax,rbp\n\txor\trax,1\n\n\tlea\tr8,QWORD PTR[536+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_ct_is_square_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_ct_is_square_mod_384::\nct_is_square_mod_384\tENDP\n\n\nALIGN\t32\n__smulq_384_n_shift_by_30\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\trbx,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbx,rdx\n\tadd\trbx,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmov\tr14,rdx\n\tand\tr14,rbx\n\tmul\trbx\n\tmov\tr8,rax\n\tmov\trax,r9\n\tmov\tr9,rdx\n\tmul\trbx\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbx\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbx\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trbx\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tneg\tr14\n\tmul\trbx\n\tadd\tr13,rax\n\tadc\tr14,rdx\n\tlea\trsi,QWORD PTR[48+rsi]\n\tmov\trdx,rcx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\trbx,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbx,rdx\n\tadd\trbx,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmov\tr15,rdx\n\tand\tr15,rbx\n\tmul\trbx\n\tmov\tr8,rax\n\tmov\trax,r9\n\tmov\tr9,rdx\n\tmul\trbx\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbx\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbx\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trbx\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tneg\tr15\n\tmul\trbx\n\tadd\tr13,rax\n\tadc\tr15,rdx\n\tlea\trsi,QWORD PTR[((-48))+rsi]\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,QWORD PTR[32+rdi]\n\tadc\tr13,QWORD PTR[40+rdi]\n\tadc\tr14,r15\n\n\tshrd\tr8,r9,30\n\tshrd\tr9,r10,30\n\tshrd\tr10,r11,30\n\tshrd\tr11,r12,30\n\tshrd\tr12,r13,30\n\tshrd\tr13,r14,30\n\n\tsar\tr14,63\n\txor\trbx,rbx\n\tsub\trbx,r14\n\n\txor\tr8,r14\n\txor\tr9,r14\n\txor\tr10,r14\n\txor\tr11,r14\n\txor\tr12,r14\n\txor\tr13,r14\n\tadd\tr8,rbx\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_384_n_shift_by_30\tENDP\n\nALIGN\t32\n__ab_approximation_30\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trbx,QWORD PTR[88+rsi]\n\tmov\tr15,QWORD PTR[80+rsi]\n\tmov\tr14,QWORD PTR[72+rsi]\n\n\tmov\trax,r13\n\tor\trax,rbx\n\tcmovz\tr13,r12\n\tcmovz\trbx,r15\n\tcmovz\tr12,r11\n\tmov\tr11,QWORD PTR[64+rsi]\n\tcmovz\tr15,r14\n\n\tmov\trax,r13\n\tor\trax,rbx\n\tcmovz\tr13,r12\n\tcmovz\trbx,r15\n\tcmovz\tr12,r10\n\tmov\tr10,QWORD PTR[56+rsi]\n\tcmovz\tr15,r11\n\n\tmov\trax,r13\n\tor\trax,rbx\n\tcmovz\tr13,r12\n\tcmovz\trbx,r15\n\tcmovz\tr12,r9\n\tmov\tr9,QWORD PTR[48+rsi]\n\tcmovz\tr15,r10\n\n\tmov\trax,r13\n\tor\trax,rbx\n\tcmovz\tr13,r12\n\tcmovz\trbx,r15\n\tcmovz\tr12,r8\n\tcmovz\tr15,r9\n\n\tmov\trax,r13\n\tor\trax,rbx\n\tbsr\trcx,rax\n\tlea\trcx,QWORD PTR[1+rcx]\n\tcmovz\tr13,r8\n\tcmovz\trbx,r9\n\tcmovz\trcx,rax\n\tneg\trcx\n\n\n\tshld\tr13,r12,cl\n\tshld\trbx,r15,cl\n\n\tmov\trax,0FFFFFFFF00000000h\n\tmov\tr8d,r8d\n\tmov\tr9d,r9d\n\tand\tr13,rax\n\tand\trbx,rax\n\tor\tr8,r13\n\tor\tr9,rbx\n\n\tjmp\t__inner_loop_30\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__ab_approximation_30\tENDP\n\nALIGN\t32\n__inner_loop_30\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trbx,07FFFFFFF80000000h\n\tmov\trcx,0800000007FFFFFFFh\n\tlea\tr15,QWORD PTR[((-1))+rbx]\n\tmov\tedi,30\n\n$L$oop_30::\n\tmov\trax,r8\n\tand\trax,r9\n\tshr\trax,1\n\n\tcmp\tr8,r9\n\tmov\tr10,r8\n\tmov\tr11,r9\n\tlea\trax,QWORD PTR[rbp*1+rax]\n\tmov\tr12,rbx\n\tmov\tr13,rcx\n\tmov\tr14,rbp\n\tcmovb\tr8,r9\n\tcmovb\tr9,r10\n\tcmovb\trbx,rcx\n\tcmovb\trcx,r12\n\tcmovb\trbp,rax\n\n\tsub\tr8,r9\n\tsub\trbx,rcx\n\tadd\trbx,r15\n\n\ttest\tr10,1\n\tcmovz\tr8,r10\n\tcmovz\tr9,r11\n\tcmovz\trbx,r12\n\tcmovz\trcx,r13\n\tcmovz\trbp,r14\n\n\tlea\trax,QWORD PTR[2+r9]\n\tshr\tr8,1\n\tshr\trax,2\n\tadd\trcx,rcx\n\tlea\trbp,QWORD PTR[rbp*1+rax]\n\tsub\trcx,r15\n\n\tsub\tedi,1\n\tjnz\t$L$oop_30\n\n\tshr\tr15,32\n\tmov\teax,ebx\n\tshr\trbx,32\n\tmov\tedx,ecx\n\tshr\trcx,32\n\tsub\trax,r15\n\tsub\trbx,r15\n\tsub\trdx,r15\n\tsub\trcx,r15\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__inner_loop_30\tENDP\n\n\nALIGN\t32\n__inner_loop_48\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tedi,48\n\n$L$oop_48::\n\tmov\trax,r8\n\tand\trax,r9\n\tshr\trax,1\n\n\tcmp\tr8,r9\n\tmov\tr10,r8\n\tmov\tr11,r9\n\tlea\trax,QWORD PTR[rbp*1+rax]\n\tmov\tr12,rbp\n\tcmovb\tr8,r9\n\tcmovb\tr9,r10\n\tcmovb\trbp,rax\n\n\tsub\tr8,r9\n\n\ttest\tr10,1\n\tcmovz\tr8,r10\n\tcmovz\tr9,r11\n\tcmovz\trbp,r12\n\n\tlea\trax,QWORD PTR[2+r9]\n\tshr\tr8,1\n\tshr\trax,2\n\tadd\trbp,rax\n\n\tsub\tedi,1\n\tjnz\t$L$oop_48\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__inner_loop_48\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_ct_is_square_mod_384\n\tDD\timagerel $L$SEH_body_ct_is_square_mod_384\n\tDD\timagerel $L$SEH_info_ct_is_square_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_ct_is_square_mod_384\n\tDD\timagerel $L$SEH_epilogue_ct_is_square_mod_384\n\tDD\timagerel $L$SEH_info_ct_is_square_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_ct_is_square_mod_384\n\tDD\timagerel $L$SEH_end_ct_is_square_mod_384\n\tDD\timagerel $L$SEH_info_ct_is_square_mod_384_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_ct_is_square_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_ct_is_square_mod_384_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,043h,000h\nDB\t000h,0e4h,044h,000h\nDB\t000h,0d4h,045h,000h\nDB\t000h,0c4h,046h,000h\nDB\t000h,034h,047h,000h\nDB\t000h,054h,048h,000h\nDB\t000h,074h,04ah,000h\nDB\t000h,064h,04bh,000h\nDB\t000h,001h,049h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_ct_is_square_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/ctq_inverse_mod_384-x86_64.asm",
    "content": "OPTION\tDOTNAME\nEXTERN\tct_inverse_mod_384$1:NEAR\n_DATA\tSEGMENT\nCOMM\t__blst_platform_cap:DWORD:1\n_DATA\tENDS\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tct_inverse_mod_384\n\n\nALIGN\t32\nct_inverse_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_ct_inverse_mod_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tct_inverse_mod_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,1112\n\n$L$SEH_body_ct_inverse_mod_384::\n\n\n\tlea\trax,QWORD PTR[((88+511))+rsp]\n\tand\trax,-512\n\tmov\tQWORD PTR[32+rsp],rdi\n\tmov\tQWORD PTR[40+rsp],rcx\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\tr14,QWORD PTR[rdx]\n\tmov\tr15,QWORD PTR[8+rdx]\n\tmov\trbx,QWORD PTR[16+rdx]\n\tmov\trbp,QWORD PTR[24+rdx]\n\tmov\trsi,QWORD PTR[32+rdx]\n\tmov\trdi,QWORD PTR[40+rdx]\n\n\tmov\tQWORD PTR[rax],r8\n\tmov\tQWORD PTR[8+rax],r9\n\tmov\tQWORD PTR[16+rax],r10\n\tmov\tQWORD PTR[24+rax],r11\n\tmov\tQWORD PTR[32+rax],r12\n\tmov\tQWORD PTR[40+rax],r13\n\n\tmov\tQWORD PTR[48+rax],r14\n\tmov\tQWORD PTR[56+rax],r15\n\tmov\tQWORD PTR[64+rax],rbx\n\tmov\tQWORD PTR[72+rax],rbp\n\tmov\tQWORD PTR[80+rax],rsi\n\tmov\trsi,rax\n\tmov\tQWORD PTR[88+rax],rdi\n\n\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmov\tQWORD PTR[96+rdi],rdx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\tmov\tQWORD PTR[104+rdi],rdx\n\n\n\txor\trsi,256\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\n\n\n\tmov\trax,QWORD PTR[96+rsi]\n\tmov\tr11,QWORD PTR[152+rsi]\n\tmov\trbx,rdx\n\tmov\tr10,rax\n\timul\tQWORD PTR[56+rsp]\n\tmov\tr8,rax\n\tmov\trax,r11\n\tmov\tr9,rdx\n\timul\tQWORD PTR[64+rsp]\n\tadd\tr8,rax\n\tadc\tr9,rdx\n\tmov\tQWORD PTR[48+rdi],r8\n\tmov\tQWORD PTR[56+rdi],r9\n\tsar\tr9,63\n\tmov\tQWORD PTR[64+rdi],r9\n\tmov\tQWORD PTR[72+rdi],r9\n\tmov\tQWORD PTR[80+rdi],r9\n\tmov\tQWORD PTR[88+rdi],r9\n\tmov\tQWORD PTR[96+rdi],r9\n\tlea\trsi,QWORD PTR[96+rsi]\n\n\tmov\trax,r10\n\timul\trbx\n\tmov\tr8,rax\n\tmov\trax,r11\n\tmov\tr9,rdx\n\timul\trcx\n\tadd\tr8,rax\n\tadc\tr9,rdx\n\tmov\tQWORD PTR[104+rdi],r8\n\tmov\tQWORD PTR[112+rdi],r9\n\tsar\tr9,63\n\tmov\tQWORD PTR[120+rdi],r9\n\tmov\tQWORD PTR[128+rdi],r9\n\tmov\tQWORD PTR[136+rdi],r9\n\tmov\tQWORD PTR[144+rdi],r9\n\tmov\tQWORD PTR[152+rdi],r9\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_384x63\n\tmov\tQWORD PTR[56+rdi],r14\n\tmov\tQWORD PTR[64+rdi],r14\n\tmov\tQWORD PTR[72+rdi],r14\n\tmov\tQWORD PTR[80+rdi],r14\n\tmov\tQWORD PTR[88+rdi],r14\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\tcall\t__ab_approximation_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_768x63\n\n\txor\trsi,256+8*12\n\tmov\tedi,62\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[48+rsi]\n\tmov\tr11,QWORD PTR[56+rsi]\n\tcall\t__inner_loop_62\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[48+rdi],r10\n\n\n\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[96+rdi]\n\tcall\t__smulq_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulq_768x63\n\n\n\txor\trsi,256+8*12\n\tmov\tedi,24\n\n\tmov\tr8,QWORD PTR[rsi]\n\txor\tr9,r9\n\tmov\tr10,QWORD PTR[48+rsi]\n\txor\tr11,r11\n\tcall\t__inner_loop_62\n\n\n\n\n\n\n\n\tlea\trsi,QWORD PTR[96+rsi]\n\n\n\n\n\n\tmov\trdx,r12\n\tmov\trcx,r13\n\tmov\trdi,QWORD PTR[32+rsp]\n\tcall\t__smulq_768x63\n\n\tmov\trsi,QWORD PTR[40+rsp]\n\tmov\tr13,rdx\n\tsar\tr13,63\n\n\tmov\tr8,r13\n\tmov\tr9,r13\n\tmov\tr10,r13\n\tand\tr8,QWORD PTR[rsi]\n\tand\tr9,QWORD PTR[8+rsi]\n\tmov\tr11,r13\n\tand\tr10,QWORD PTR[16+rsi]\n\tand\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,r13\n\tand\tr12,QWORD PTR[32+rsi]\n\tand\tr13,QWORD PTR[40+rsi]\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tadc\trbx,r10\n\tadc\trbp,r11\n\tadc\trcx,r12\n\tadc\trax,r13\n\tadc\trdx,0\n\n\tmov\tr13,rdx\n\tneg\trdx\n\tor\tr13,rdx\n\tsar\trdx,63\n\n\tmov\tr8,r13\n\tmov\tr9,r13\n\tmov\tr10,r13\n\tand\tr8,QWORD PTR[rsi]\n\tand\tr9,QWORD PTR[8+rsi]\n\tmov\tr11,r13\n\tand\tr10,QWORD PTR[16+rsi]\n\tand\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,r13\n\tand\tr12,QWORD PTR[32+rsi]\n\tand\tr13,QWORD PTR[40+rsi]\n\n\txor\tr8,rdx\n\txor\trsi,rsi\n\txor\tr9,rdx\n\tsub\trsi,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\tadd\tr8,rsi\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tadc\trbx,r10\n\tadc\trbp,r11\n\tadc\trcx,r12\n\tadc\trax,r13\n\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\tmov\tQWORD PTR[64+rdi],rbx\n\tmov\tQWORD PTR[72+rdi],rbp\n\tmov\tQWORD PTR[80+rdi],rcx\n\tmov\tQWORD PTR[88+rdi],rax\n\n\tlea\tr8,QWORD PTR[1112+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_ct_inverse_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_ct_inverse_mod_384::\nct_inverse_mod_384\tENDP\n\nALIGN\t32\n__smulq_768x63\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tmov\trbp,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\tmov\tQWORD PTR[8+rsp],rdi\n\tmov\tQWORD PTR[16+rsp],rsi\n\tlea\trsi,QWORD PTR[56+rsi]\n\n\txor\trbp,rdx\n\tadd\trbp,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\txor\tr14,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\n\tmul\trbp\n\tmov\tQWORD PTR[rdi],rax\n\tmov\trax,r9\n\tand\tr14,rbp\n\tneg\tr14\n\tmov\tr9,rdx\n\tmul\trbp\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmov\tQWORD PTR[8+rdi],r9\n\tmul\trbp\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmov\tQWORD PTR[16+rdi],r10\n\tmul\trbp\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\tQWORD PTR[24+rdi],r11\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmov\tQWORD PTR[32+rdi],r12\n\tmul\trbp\n\tadd\tr13,rax\n\tadc\tr14,rdx\n\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\tsar\tr14,63\n\tmov\tQWORD PTR[56+rdi],r14\n\tmov\trdx,rcx\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\tmov\tr15,QWORD PTR[56+rsi]\n\tmov\trbx,QWORD PTR[64+rsi]\n\tmov\trbp,QWORD PTR[72+rsi]\n\tmov\trcx,QWORD PTR[80+rsi]\n\tmov\trdi,QWORD PTR[88+rsi]\n\n\tmov\trsi,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trsi,rdx\n\tadd\trsi,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\txor\tr14,rdx\n\txor\tr15,rdx\n\txor\trbx,rdx\n\txor\trbp,rdx\n\txor\trcx,rdx\n\txor\trdi,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\tadc\tr15,0\n\tadc\trbx,0\n\tadc\trbp,0\n\tadc\trcx,0\n\tadc\trdi,0\n\n\tmul\trsi\n\tmov\tr8,rax\n\tmov\trax,r9\n\tmov\tr9,rdx\n\tmul\trsi\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trsi\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trsi\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trsi\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmul\trsi\n\tadd\tr13,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\tmul\trsi\n\tadd\tr14,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\tmul\trsi\n\tadd\tr15,rax\n\tmov\trax,rbx\n\tadc\trdx,0\n\tmov\trbx,rdx\n\tmul\trsi\n\tadd\trbx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\tmul\trsi\n\tadd\trbp,rax\n\tmov\trax,rcx\n\tadc\trdx,0\n\tmov\trcx,rdx\n\tmul\trsi\n\tadd\trcx,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\trdi,rdx\n\timul\trsi\n\tmov\trsi,QWORD PTR[8+rsp]\n\tadd\trax,rdi\n\tadc\trdx,0\n\n\tadd\tr8,QWORD PTR[rsi]\n\tadc\tr9,QWORD PTR[8+rsi]\n\tadc\tr10,QWORD PTR[16+rsi]\n\tadc\tr11,QWORD PTR[24+rsi]\n\tadc\tr12,QWORD PTR[32+rsi]\n\tadc\tr13,QWORD PTR[40+rsi]\n\tadc\tr14,QWORD PTR[48+rsi]\n\tmov\trdi,QWORD PTR[56+rsi]\n\tadc\tr15,rdi\n\tadc\trbx,rdi\n\tadc\trbp,rdi\n\tadc\trcx,rdi\n\tadc\trax,rdi\n\tadc\trdx,rdi\n\n\tlea\trdi,QWORD PTR[rsi]\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\tmov\tQWORD PTR[64+rdi],rbx\n\tmov\tQWORD PTR[72+rdi],rbp\n\tmov\tQWORD PTR[80+rdi],rcx\n\tmov\tQWORD PTR[88+rdi],rax\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_768x63\tENDP\n\nALIGN\t32\n__smulq_384x63\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tmov\trbp,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbp,rdx\n\tadd\trbp,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\txor\tr14,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\n\tmul\trbp\n\tmov\tr8,rax\n\tmov\trax,r9\n\tand\tr14,rbp\n\tneg\tr14\n\tmov\tr9,rdx\n\tmul\trbp\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbp\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbp\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmul\trbp\n\tadd\tr13,rax\n\tadc\tr14,rdx\n\n\tlea\trsi,QWORD PTR[56+rsi]\n\tmov\trdx,rcx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tr15,r13\n\tmov\trbx,r14\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tmov\trbp,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbp,rdx\n\tadd\trbp,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\txor\tr14,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\n\tmul\trbp\n\tmov\tr8,rax\n\tmov\trax,r9\n\tand\tr14,rbp\n\tneg\tr14\n\tmov\tr9,rdx\n\tmul\trbp\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbp\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbp\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmul\trbp\n\tadd\tr13,rax\n\tadc\tr14,rdx\n\n\tlea\trsi,QWORD PTR[((-56))+rsi]\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,QWORD PTR[32+rdi]\n\tadc\tr13,r15\n\tadc\tr14,rbx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_384x63\tENDP\n\nALIGN\t32\n__smulq_384_n_shift_by_62\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trbx,rdx\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\trbp,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbp,rdx\n\tadd\trbp,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\tmov\tr14,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmul\trbp\n\tmov\tr8,rax\n\tmov\trax,r9\n\tand\tr14,rbp\n\tneg\tr14\n\tmov\tr9,rdx\n\tmul\trbp\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbp\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbp\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmul\trbp\n\tadd\tr13,rax\n\tadc\tr14,rdx\n\n\tlea\trsi,QWORD PTR[48+rsi]\n\tmov\trdx,rcx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\trbp,rdx\n\tsar\trdx,63\n\txor\trax,rax\n\tsub\trax,rdx\n\n\txor\trbp,rdx\n\tadd\trbp,rax\n\n\txor\tr8,rdx\n\txor\tr9,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\tmov\tr15,rdx\n\tadd\trax,r8\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmul\trbp\n\tmov\tr8,rax\n\tmov\trax,r9\n\tand\tr15,rbp\n\tneg\tr15\n\tmov\tr9,rdx\n\tmul\trbp\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\tmul\trbp\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\tmul\trbp\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\tmul\trbp\n\tadd\tr13,rax\n\tadc\tr15,rdx\n\n\tlea\trsi,QWORD PTR[((-48))+rsi]\n\tmov\trdx,rbx\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,QWORD PTR[32+rdi]\n\tadc\tr13,QWORD PTR[40+rdi]\n\tadc\tr14,r15\n\n\tshrd\tr8,r9,62\n\tshrd\tr9,r10,62\n\tshrd\tr10,r11,62\n\tshrd\tr11,r12,62\n\tshrd\tr12,r13,62\n\tshrd\tr13,r14,62\n\n\tsar\tr14,63\n\txor\trbp,rbp\n\tsub\trbp,r14\n\n\txor\tr8,r14\n\txor\tr9,r14\n\txor\tr10,r14\n\txor\tr11,r14\n\txor\tr12,r14\n\txor\tr13,r14\n\tadd\tr8,rbp\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\txor\trdx,r14\n\txor\trcx,r14\n\tadd\trdx,rbp\n\tadd\trcx,rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulq_384_n_shift_by_62\tENDP\n\nALIGN\t32\n__ab_approximation_62\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr9,QWORD PTR[40+rsi]\n\tmov\tr11,QWORD PTR[88+rsi]\n\tmov\trbx,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[80+rsi]\n\tmov\tr8,QWORD PTR[24+rsi]\n\tmov\tr10,QWORD PTR[72+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tcmovz\trbp,r10\n\tmov\tr8,QWORD PTR[16+rsi]\n\tmov\tr10,QWORD PTR[64+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tcmovz\trbp,r10\n\tmov\tr8,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[56+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tcmovz\trbp,r10\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr10,QWORD PTR[48+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tbsr\trcx,rax\n\tlea\trcx,QWORD PTR[1+rcx]\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trcx,rax\n\tneg\trcx\n\n\n\tshld\tr9,rbx,cl\n\tshld\tr11,rbp,cl\n\n\tjmp\t__inner_loop_62\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__ab_approximation_62\tENDP\n\nALIGN\t8\n\tDD\t0\n__inner_loop_62\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trdx,1\n\txor\trcx,rcx\n\txor\tr12,r12\n\tmov\tr13,1\n\tmov\tQWORD PTR[8+rsp],rsi\n\n$L$oop_62::\n\txor\trax,rax\n\txor\trbx,rbx\n\ttest\tr8,1\n\tmov\trbp,r10\n\tmov\tr14,r11\n\tcmovnz\trax,r10\n\tcmovnz\trbx,r11\n\tsub\trbp,r8\n\tsbb\tr14,r9\n\tmov\tr15,r8\n\tmov\trsi,r9\n\tsub\tr8,rax\n\tsbb\tr9,rbx\n\tcmovc\tr8,rbp\n\tcmovc\tr9,r14\n\tcmovc\tr10,r15\n\tcmovc\tr11,rsi\n\tmov\trax,rdx\n\tcmovc\trdx,r12\n\tcmovc\tr12,rax\n\tmov\trbx,rcx\n\tcmovc\trcx,r13\n\tcmovc\tr13,rbx\n\txor\trax,rax\n\txor\trbx,rbx\n\tshrd\tr8,r9,1\n\tshr\tr9,1\n\ttest\tr15,1\n\tcmovnz\trax,r12\n\tcmovnz\trbx,r13\n\tadd\tr12,r12\n\tadd\tr13,r13\n\tsub\trdx,rax\n\tsub\trcx,rbx\n\tsub\tedi,1\n\tjnz\t$L$oop_62\n\n\tmov\trsi,QWORD PTR[8+rsp]\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trax\n\tlfence\n\tjmp\trax\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__inner_loop_62\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_ct_inverse_mod_384\n\tDD\timagerel $L$SEH_body_ct_inverse_mod_384\n\tDD\timagerel $L$SEH_info_ct_inverse_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_ct_inverse_mod_384\n\tDD\timagerel $L$SEH_epilogue_ct_inverse_mod_384\n\tDD\timagerel $L$SEH_info_ct_inverse_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_ct_inverse_mod_384\n\tDD\timagerel $L$SEH_end_ct_inverse_mod_384\n\tDD\timagerel $L$SEH_info_ct_inverse_mod_384_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_ct_inverse_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_ct_inverse_mod_384_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,08bh,000h\nDB\t000h,0e4h,08ch,000h\nDB\t000h,0d4h,08dh,000h\nDB\t000h,0c4h,08eh,000h\nDB\t000h,034h,08fh,000h\nDB\t000h,054h,090h,000h\nDB\t000h,074h,092h,000h\nDB\t000h,064h,093h,000h\nDB\t000h,001h,091h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_ct_inverse_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/ctx_inverse_mod_384-x86_64.asm",
    "content": "OPTION\tDOTNAME\nPUBLIC\tct_inverse_mod_384$1\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tctx_inverse_mod_384\n\n\nALIGN\t32\nctx_inverse_mod_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_ctx_inverse_mod_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nct_inverse_mod_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,1112\n\n$L$SEH_body_ctx_inverse_mod_384::\n\n\n\tlea\trax,QWORD PTR[((88+511))+rsp]\n\tand\trax,-512\n\tmov\tQWORD PTR[32+rsp],rdi\n\tmov\tQWORD PTR[40+rsp],rcx\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\tr14,QWORD PTR[rdx]\n\tmov\tr15,QWORD PTR[8+rdx]\n\tmov\trbx,QWORD PTR[16+rdx]\n\tmov\trbp,QWORD PTR[24+rdx]\n\tmov\trsi,QWORD PTR[32+rdx]\n\tmov\trdi,QWORD PTR[40+rdx]\n\n\tmov\tQWORD PTR[rax],r8\n\tmov\tQWORD PTR[8+rax],r9\n\tmov\tQWORD PTR[16+rax],r10\n\tmov\tQWORD PTR[24+rax],r11\n\tmov\tQWORD PTR[32+rax],r12\n\tmov\tQWORD PTR[40+rax],r13\n\n\tmov\tQWORD PTR[48+rax],r14\n\tmov\tQWORD PTR[56+rax],r15\n\tmov\tQWORD PTR[64+rax],rbx\n\tmov\tQWORD PTR[72+rax],rbp\n\tmov\tQWORD PTR[80+rax],rsi\n\tmov\trsi,rax\n\tmov\tQWORD PTR[88+rax],rdi\n\n\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmov\tQWORD PTR[96+rdi],rdx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\tmov\tQWORD PTR[104+rdi],rdx\n\n\n\txor\trsi,256\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\n\n\n\tmov\trax,QWORD PTR[96+rsi]\n\tmov\tr11,QWORD PTR[152+rsi]\n\tmov\trbx,rdx\n\tmov\tr10,rax\n\timul\tQWORD PTR[56+rsp]\n\tmov\tr8,rax\n\tmov\trax,r11\n\tmov\tr9,rdx\n\timul\tQWORD PTR[64+rsp]\n\tadd\tr8,rax\n\tadc\tr9,rdx\n\tmov\tQWORD PTR[48+rdi],r8\n\tmov\tQWORD PTR[56+rdi],r9\n\tsar\tr9,63\n\tmov\tQWORD PTR[64+rdi],r9\n\tmov\tQWORD PTR[72+rdi],r9\n\tmov\tQWORD PTR[80+rdi],r9\n\tmov\tQWORD PTR[88+rdi],r9\n\tmov\tQWORD PTR[96+rdi],r9\n\tlea\trsi,QWORD PTR[96+rsi]\n\n\tmov\trax,r10\n\timul\trbx\n\tmov\tr8,rax\n\tmov\trax,r11\n\tmov\tr9,rdx\n\timul\trcx\n\tadd\tr8,rax\n\tadc\tr9,rdx\n\tmov\tQWORD PTR[104+rdi],r8\n\tmov\tQWORD PTR[112+rdi],r9\n\tsar\tr9,63\n\tmov\tQWORD PTR[120+rdi],r9\n\tmov\tQWORD PTR[128+rdi],r9\n\tmov\tQWORD PTR[136+rdi],r9\n\tmov\tQWORD PTR[144+rdi],r9\n\tmov\tQWORD PTR[152+rdi],r9\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_384x63\n\tmov\tQWORD PTR[56+rdi],r14\n\tmov\tQWORD PTR[64+rdi],r14\n\tmov\tQWORD PTR[72+rdi],r14\n\tmov\tQWORD PTR[80+rdi],r14\n\tmov\tQWORD PTR[88+rdi],r14\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\txor\trsi,256+8*12\n\tmov\tedi,31\n\tcall\t__ab_approximation_31\n\n\n\tmov\tQWORD PTR[72+rsp],r12\n\tmov\tQWORD PTR[80+rsp],r13\n\n\tmov\trdi,256\n\txor\trdi,rsi\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[56+rsp],rdx\n\tmov\tQWORD PTR[64+rsp],rcx\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_191_n_shift_by_31\n\tmov\tQWORD PTR[72+rsp],rdx\n\tmov\tQWORD PTR[80+rsp],rcx\n\n\tmov\trdx,QWORD PTR[56+rsp]\n\tmov\trcx,QWORD PTR[64+rsp]\n\tlea\trsi,QWORD PTR[96+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__smulx_384x63\n\n\tmov\trdx,QWORD PTR[72+rsp]\n\tmov\trcx,QWORD PTR[80+rsp]\n\tlea\trdi,QWORD PTR[56+rdi]\n\tcall\t__smulx_768x63\n\n\txor\trsi,256+8*12\n\tmov\tedi,55\n\n\tmov\tr8,QWORD PTR[rsi]\n\n\tmov\tr10,QWORD PTR[48+rsi]\n\n\tcall\t__tail_loop_55\n\n\n\n\n\n\n\n\tlea\trsi,QWORD PTR[96+rsi]\n\n\n\n\n\n\tmov\trdx,r12\n\tmov\trcx,r13\n\tmov\trdi,QWORD PTR[32+rsp]\n\tcall\t__smulx_768x63\n\n\tmov\trsi,QWORD PTR[40+rsp]\n\tmov\tr13,rdx\n\tsar\tr13,63\n\n\tmov\tr8,r13\n\tmov\tr9,r13\n\tmov\tr10,r13\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tand\tr8,QWORD PTR[rsi]\n\tand\tr9,QWORD PTR[8+rsi]\n\tmov\tr11,r13\n\tand\tr10,QWORD PTR[16+rsi]\n\tand\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,r13\n\tand\tr12,QWORD PTR[32+rsi]\n\tand\tr13,QWORD PTR[40+rsi]\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tadc\trbx,r10\n\tadc\trbp,r11\n\tadc\trcx,r12\n\tadc\trax,r13\n\tadc\trdx,0\n\n\tmov\tr13,rdx\n\tneg\trdx\n\tor\tr13,rdx\n\tsar\trdx,63\n\n\tmov\tr8,r13\n\tmov\tr9,r13\n\tmov\tr10,r13\n\tand\tr8,QWORD PTR[rsi]\n\tand\tr9,QWORD PTR[8+rsi]\n\tmov\tr11,r13\n\tand\tr10,QWORD PTR[16+rsi]\n\tand\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,r13\n\tand\tr12,QWORD PTR[32+rsi]\n\tand\tr13,QWORD PTR[40+rsi]\n\n\txor\tr8,rdx\n\txor\trsi,rsi\n\txor\tr9,rdx\n\tsub\trsi,rdx\n\txor\tr10,rdx\n\txor\tr11,rdx\n\txor\tr12,rdx\n\txor\tr13,rdx\n\tadd\tr8,rsi\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tadc\trbx,r10\n\tadc\trbp,r11\n\tadc\trcx,r12\n\tadc\trax,r13\n\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\tmov\tQWORD PTR[64+rdi],rbx\n\tmov\tQWORD PTR[72+rdi],rbp\n\tmov\tQWORD PTR[80+rdi],rcx\n\tmov\tQWORD PTR[88+rdi],rax\n\n\tlea\tr8,QWORD PTR[1112+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_ctx_inverse_mod_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_ctx_inverse_mod_384::\nctx_inverse_mod_384\tENDP\n\nALIGN\t32\n__smulx_768x63\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tmov\trax,rdx\n\tsar\trax,63\n\txor\trbp,rbp\n\tsub\trbp,rax\n\n\tmov\tQWORD PTR[8+rsp],rdi\n\tmov\tQWORD PTR[16+rsp],rsi\n\tlea\trsi,QWORD PTR[56+rsi]\n\n\txor\trdx,rax\n\tadd\trdx,rbp\n\n\txor\tr8,rax\n\txor\tr9,rax\n\txor\tr10,rax\n\txor\tr11,rax\n\txor\tr12,rax\n\txor\tr13,rax\n\txor\tr14,rax\n\tadd\tr8,rbp\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\n\tand\tr14,rdx\n\tneg\tr14\n\n\tmulx\trbp,r8,r8\n\tmulx\trax,r9,r9\n\tadd\tr9,rbp\n\tmulx\trbp,r10,r10\n\tadc\tr10,rax\n\tmulx\trax,r11,r11\n\tadc\tr11,rbp\n\tmulx\trbp,r12,r12\n\tadc\tr12,rax\n\tmulx\trax,r13,r13\n\tadc\tr13,rbp\n\tadc\tr14,rax\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\tsar\tr14,63\n\tmov\tQWORD PTR[56+rdi],r14\n\tmov\trdx,rcx\n\tmov\trax,rcx\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\tmov\tr15,QWORD PTR[56+rsi]\n\tmov\trbx,QWORD PTR[64+rsi]\n\tmov\trbp,QWORD PTR[72+rsi]\n\tmov\trcx,QWORD PTR[80+rsi]\n\tmov\trdi,QWORD PTR[88+rsi]\n\n\tsar\trax,63\n\txor\trsi,rsi\n\tsub\trsi,rax\n\n\txor\trdx,rax\n\tadd\trdx,rsi\n\n\txor\tr8,rax\n\txor\tr9,rax\n\txor\tr10,rax\n\txor\tr11,rax\n\txor\tr12,rax\n\txor\tr13,rax\n\txor\tr14,rax\n\txor\tr15,rax\n\txor\trbx,rax\n\txor\trbp,rax\n\txor\trcx,rax\n\txor\trax,rdi\n\tadd\tr8,rsi\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\tadc\tr15,0\n\tadc\trbx,0\n\tadc\trbp,0\n\tadc\trcx,0\n\tadc\trax,0\n\n\tmulx\trsi,r8,r8\n\tmulx\trdi,r9,r9\n\tadd\tr9,rsi\n\tmulx\trsi,r10,r10\n\tadc\tr10,rdi\n\tmulx\trdi,r11,r11\n\tadc\tr11,rsi\n\tmulx\trsi,r12,r12\n\tadc\tr12,rdi\n\tmulx\trdi,r13,r13\n\tadc\tr13,rsi\n\tmulx\trsi,r14,r14\n\tadc\tr14,rdi\n\tmulx\trdi,r15,r15\n\tadc\tr15,rsi\n\tmulx\trsi,rbx,rbx\n\tadc\trbx,rdi\n\tmulx\trdi,rbp,rbp\n\tadc\trbp,rsi\n\tmulx\trsi,rcx,rcx\n\tadc\trcx,rdi\n\tmov\trdi,QWORD PTR[8+rsp]\n\tadc\trsi,0\n\timul\trdx\n\tadd\trax,rsi\n\tadc\trdx,0\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,QWORD PTR[32+rdi]\n\tadc\tr13,QWORD PTR[40+rdi]\n\tadc\tr14,QWORD PTR[48+rdi]\n\tmov\trsi,QWORD PTR[56+rdi]\n\tadc\tr15,rsi\n\tadc\trbx,rsi\n\tadc\trbp,rsi\n\tadc\trcx,rsi\n\tadc\trax,rsi\n\tadc\trdx,rsi\n\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\tmov\tQWORD PTR[64+rdi],rbx\n\tmov\tQWORD PTR[72+rdi],rbp\n\tmov\tQWORD PTR[80+rdi],rcx\n\tmov\tQWORD PTR[88+rdi],rax\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulx_768x63\tENDP\n\nALIGN\t32\n__smulx_384x63\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[((0+0))+rsi]\n\tmov\tr9,QWORD PTR[((0+8))+rsi]\n\tmov\tr10,QWORD PTR[((0+16))+rsi]\n\tmov\tr11,QWORD PTR[((0+24))+rsi]\n\tmov\tr12,QWORD PTR[((0+32))+rsi]\n\tmov\tr13,QWORD PTR[((0+40))+rsi]\n\tmov\tr14,QWORD PTR[((0+48))+rsi]\n\n\tmov\trbp,rdx\n\tsar\trbp,63\n\txor\trax,rax\n\tsub\trax,rbp\n\n\txor\trdx,rbp\n\tadd\trdx,rax\n\n\txor\tr8,rbp\n\txor\tr9,rbp\n\txor\tr10,rbp\n\txor\tr11,rbp\n\txor\tr12,rbp\n\txor\tr13,rbp\n\txor\tr14,rbp\n\tadd\tr8,rax\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\n\tand\tr14,rdx\n\tneg\tr14\n\n\tmulx\trbp,r8,r8\n\tmulx\trax,r9,r9\n\tadd\tr9,rbp\n\tmulx\trbp,r10,r10\n\tadc\tr10,rax\n\tmulx\trax,r11,r11\n\tadc\tr11,rbp\n\tmulx\trbp,r12,r12\n\tadc\tr12,rax\n\tmulx\trax,r13,r13\n\tmov\trdx,rcx\n\tadc\tr13,rbp\n\tadc\tr14,rax\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tr15,r13\n\tmov\trbx,r14\n\tmov\tr8,QWORD PTR[((56+0))+rsi]\n\tmov\tr9,QWORD PTR[((56+8))+rsi]\n\tmov\tr10,QWORD PTR[((56+16))+rsi]\n\tmov\tr11,QWORD PTR[((56+24))+rsi]\n\tmov\tr12,QWORD PTR[((56+32))+rsi]\n\tmov\tr13,QWORD PTR[((56+40))+rsi]\n\tmov\tr14,QWORD PTR[((56+48))+rsi]\n\n\tmov\trbp,rdx\n\tsar\trbp,63\n\txor\trax,rax\n\tsub\trax,rbp\n\n\txor\trdx,rbp\n\tadd\trdx,rax\n\n\txor\tr8,rbp\n\txor\tr9,rbp\n\txor\tr10,rbp\n\txor\tr11,rbp\n\txor\tr12,rbp\n\txor\tr13,rbp\n\txor\tr14,rbp\n\tadd\tr8,rax\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\tadc\tr14,0\n\n\tand\tr14,rdx\n\tneg\tr14\n\n\tmulx\trbp,r8,r8\n\tmulx\trax,r9,r9\n\tadd\tr9,rbp\n\tmulx\trbp,r10,r10\n\tadc\tr10,rax\n\tmulx\trax,r11,r11\n\tadc\tr11,rbp\n\tmulx\trbp,r12,r12\n\tadc\tr12,rax\n\tmulx\trax,r13,r13\n\tadc\tr13,rbp\n\tadc\tr14,rax\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,QWORD PTR[32+rdi]\n\tadc\tr13,r15\n\tadc\tr14,rbx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tQWORD PTR[48+rdi],r14\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulx_384x63\tENDP\n\nALIGN\t32\n__smulx_384_n_shift_by_31\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trbx,rdx\n\tmov\tr8,QWORD PTR[((0+0))+rsi]\n\tmov\tr9,QWORD PTR[((0+8))+rsi]\n\tmov\tr10,QWORD PTR[((0+16))+rsi]\n\tmov\tr11,QWORD PTR[((0+24))+rsi]\n\tmov\tr12,QWORD PTR[((0+32))+rsi]\n\tmov\tr13,QWORD PTR[((0+40))+rsi]\n\n\tmov\trax,rdx\n\tsar\trax,63\n\txor\trbp,rbp\n\tsub\trbp,rax\n\n\txor\trdx,rax\n\tadd\trdx,rbp\n\n\txor\tr8,rax\n\txor\tr9,rax\n\txor\tr10,rax\n\txor\tr11,rax\n\txor\tr12,rax\n\txor\tr13,rax\n\tadd\tr8,rbp\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tand\trax,rdx\n\tneg\trax\n\n\tmulx\trbp,r8,r8\n\tmulx\tr14,r9,r9\n\tadd\tr9,rbp\n\tmulx\trbp,r10,r10\n\tadc\tr10,r14\n\tmulx\tr14,r11,r11\n\tadc\tr11,rbp\n\tmulx\trbp,r12,r12\n\tadc\tr12,r14\n\tmulx\tr14,r13,r13\n\tadc\tr13,rbp\n\tadc\tr14,rax\n\n\tmov\trdx,rcx\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tr15,r14\n\tmov\tr8,QWORD PTR[((48+0))+rsi]\n\tmov\tr9,QWORD PTR[((48+8))+rsi]\n\tmov\tr10,QWORD PTR[((48+16))+rsi]\n\tmov\tr11,QWORD PTR[((48+24))+rsi]\n\tmov\tr12,QWORD PTR[((48+32))+rsi]\n\tmov\tr13,QWORD PTR[((48+40))+rsi]\n\n\tmov\trax,rdx\n\tsar\trax,63\n\txor\trbp,rbp\n\tsub\trbp,rax\n\n\txor\trdx,rax\n\tadd\trdx,rbp\n\n\txor\tr8,rax\n\txor\tr9,rax\n\txor\tr10,rax\n\txor\tr11,rax\n\txor\tr12,rax\n\txor\tr13,rax\n\tadd\tr8,rbp\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tand\trax,rdx\n\tneg\trax\n\n\tmulx\trbp,r8,r8\n\tmulx\tr14,r9,r9\n\tadd\tr9,rbp\n\tmulx\trbp,r10,r10\n\tadc\tr10,r14\n\tmulx\tr14,r11,r11\n\tadc\tr11,rbp\n\tmulx\trbp,r12,r12\n\tadc\tr12,r14\n\tmulx\tr14,r13,r13\n\tadc\tr13,rbp\n\tadc\tr14,rax\n\n\tadd\tr8,QWORD PTR[rdi]\n\tadc\tr9,QWORD PTR[8+rdi]\n\tadc\tr10,QWORD PTR[16+rdi]\n\tadc\tr11,QWORD PTR[24+rdi]\n\tadc\tr12,QWORD PTR[32+rdi]\n\tadc\tr13,QWORD PTR[40+rdi]\n\tadc\tr14,r15\n\tmov\trdx,rbx\n\n\tshrd\tr8,r9,31\n\tshrd\tr9,r10,31\n\tshrd\tr10,r11,31\n\tshrd\tr11,r12,31\n\tshrd\tr12,r13,31\n\tshrd\tr13,r14,31\n\n\tsar\tr14,63\n\txor\trbp,rbp\n\tsub\trbp,r14\n\n\txor\tr8,r14\n\txor\tr9,r14\n\txor\tr10,r14\n\txor\tr11,r14\n\txor\tr12,r14\n\txor\tr13,r14\n\tadd\tr8,rbp\n\tadc\tr9,0\n\tadc\tr10,0\n\tadc\tr11,0\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmov\tQWORD PTR[rdi],r8\n\tmov\tQWORD PTR[8+rdi],r9\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\txor\trdx,r14\n\txor\trcx,r14\n\tadd\trdx,rbp\n\tadd\trcx,rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulx_384_n_shift_by_31\tENDP\n\nALIGN\t32\n__smulx_191_n_shift_by_31\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trbx,rdx\n\tmov\tr8,QWORD PTR[((0+0))+rsi]\n\tmov\tr9,QWORD PTR[((0+8))+rsi]\n\tmov\tr10,QWORD PTR[((0+16))+rsi]\n\n\tmov\trax,rdx\n\tsar\trax,63\n\txor\trbp,rbp\n\tsub\trbp,rax\n\n\txor\trdx,rax\n\tadd\trdx,rbp\n\n\txor\tr8,rax\n\txor\tr9,rax\n\txor\trax,r10\n\tadd\tr8,rbp\n\tadc\tr9,0\n\tadc\trax,0\n\n\tmulx\trbp,r8,r8\n\tmulx\tr10,r9,r9\n\tadd\tr9,rbp\n\tadc\tr10,0\n\timul\trdx\n\tadd\tr10,rax\n\tadc\trdx,0\n\tmov\tr14,rdx\n\tmov\trdx,rcx\n\tmov\tr11,QWORD PTR[((48+0))+rsi]\n\tmov\tr12,QWORD PTR[((48+8))+rsi]\n\tmov\tr13,QWORD PTR[((48+16))+rsi]\n\n\tmov\trax,rdx\n\tsar\trax,63\n\txor\trbp,rbp\n\tsub\trbp,rax\n\n\txor\trdx,rax\n\tadd\trdx,rbp\n\n\txor\tr11,rax\n\txor\tr12,rax\n\txor\trax,r13\n\tadd\tr11,rbp\n\tadc\tr12,0\n\tadc\trax,0\n\n\tmulx\trbp,r11,r11\n\tmulx\tr13,r12,r12\n\tadd\tr12,rbp\n\tadc\tr13,0\n\timul\trdx\n\tadd\tr13,rax\n\tadc\trdx,0\n\tadd\tr11,r8\n\tadc\tr12,r9\n\tadc\tr13,r10\n\tadc\tr14,rdx\n\tmov\trdx,rbx\n\n\tshrd\tr11,r12,31\n\tshrd\tr12,r13,31\n\tshrd\tr13,r14,31\n\n\tsar\tr14,63\n\txor\trbp,rbp\n\tsub\trbp,r14\n\n\txor\tr11,r14\n\txor\tr12,r14\n\txor\tr13,r14\n\tadd\tr11,rbp\n\tadc\tr12,0\n\tadc\tr13,0\n\n\tmov\tQWORD PTR[rdi],r11\n\tmov\tQWORD PTR[8+rdi],r12\n\tmov\tQWORD PTR[16+rdi],r13\n\n\txor\trdx,r14\n\txor\trcx,r14\n\tadd\trdx,rbp\n\tadd\trcx,rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__smulx_191_n_shift_by_31\tENDP\n\nALIGN\t32\n__ab_approximation_31\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr9,QWORD PTR[40+rsi]\n\tmov\tr11,QWORD PTR[88+rsi]\n\tmov\trbx,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[80+rsi]\n\tmov\tr8,QWORD PTR[24+rsi]\n\tmov\tr10,QWORD PTR[72+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tmov\tr8,QWORD PTR[16+rsi]\n\tcmovz\trbp,r10\n\tmov\tr10,QWORD PTR[64+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tmov\tr8,QWORD PTR[8+rsi]\n\tcmovz\trbp,r10\n\tmov\tr10,QWORD PTR[56+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tmov\tr8,QWORD PTR[rsi]\n\tcmovz\trbp,r10\n\tmov\tr10,QWORD PTR[48+rsi]\n\n\tmov\trax,r9\n\tor\trax,r11\n\tcmovz\tr9,rbx\n\tcmovz\tr11,rbp\n\tcmovz\trbx,r8\n\tcmovz\trbp,r10\n\n\tmov\trax,r9\n\tor\trax,r11\n\tbsr\trcx,rax\n\tlea\trcx,QWORD PTR[1+rcx]\n\tcmovz\tr9,r8\n\tcmovz\tr11,r10\n\tcmovz\trcx,rax\n\tneg\trcx\n\n\n\tshld\tr9,rbx,cl\n\tshld\tr11,rbp,cl\n\n\tmov\teax,07FFFFFFFh\n\tand\tr8,rax\n\tand\tr10,rax\n\tandn\tr9,rax,r9\n\tandn\tr11,rax,r11\n\tor\tr8,r9\n\tor\tr10,r11\n\n\tjmp\t__inner_loop_31\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__ab_approximation_31\tENDP\n\nALIGN\t32\n__inner_loop_31\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trcx,07FFFFFFF80000000h\n\tmov\tr13,0800000007FFFFFFFh\n\tmov\tr15,07FFFFFFF7FFFFFFFh\n\n$L$oop_31::\n\tcmp\tr8,r10\n\tmov\trax,r8\n\tmov\trbx,r10\n\tmov\trbp,rcx\n\tmov\tr14,r13\n\tcmovb\tr8,r10\n\tcmovb\tr10,rax\n\tcmovb\trcx,r13\n\tcmovb\tr13,rbp\n\n\tsub\tr8,r10\n\tsub\trcx,r13\n\tadd\trcx,r15\n\n\ttest\trax,1\n\tcmovz\tr8,rax\n\tcmovz\tr10,rbx\n\tcmovz\trcx,rbp\n\tcmovz\tr13,r14\n\n\tshr\tr8,1\n\tadd\tr13,r13\n\tsub\tr13,r15\n\tsub\tedi,1\n\tjnz\t$L$oop_31\n\n\tshr\tr15,32\n\tmov\tedx,ecx\n\tmov\tr12d,r13d\n\tshr\trcx,32\n\tshr\tr13,32\n\tsub\trdx,r15\n\tsub\trcx,r15\n\tsub\tr12,r15\n\tsub\tr13,r15\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__inner_loop_31\tENDP\n\n\nALIGN\t32\n__tail_loop_55\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trdx,1\n\txor\trcx,rcx\n\txor\tr12,r12\n\tmov\tr13,1\n\n$L$oop_55::\n\txor\trax,rax\n\ttest\tr8,1\n\tmov\trbx,r10\n\tcmovnz\trax,r10\n\tsub\trbx,r8\n\tmov\trbp,r8\n\tsub\tr8,rax\n\tcmovc\tr8,rbx\n\tcmovc\tr10,rbp\n\tmov\trax,rdx\n\tcmovc\trdx,r12\n\tcmovc\tr12,rax\n\tmov\trbx,rcx\n\tcmovc\trcx,r13\n\tcmovc\tr13,rbx\n\txor\trax,rax\n\txor\trbx,rbx\n\tshr\tr8,1\n\ttest\trbp,1\n\tcmovnz\trax,r12\n\tcmovnz\trbx,r13\n\tadd\tr12,r12\n\tadd\tr13,r13\n\tsub\trdx,rax\n\tsub\trcx,rbx\n\tsub\tedi,1\n\tjnz\t$L$oop_55\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\tr8\n\tlfence\n\tjmp\tr8\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__tail_loop_55\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_ctx_inverse_mod_384\n\tDD\timagerel $L$SEH_body_ctx_inverse_mod_384\n\tDD\timagerel $L$SEH_info_ctx_inverse_mod_384_prologue\n\n\tDD\timagerel $L$SEH_body_ctx_inverse_mod_384\n\tDD\timagerel $L$SEH_epilogue_ctx_inverse_mod_384\n\tDD\timagerel $L$SEH_info_ctx_inverse_mod_384_body\n\n\tDD\timagerel $L$SEH_epilogue_ctx_inverse_mod_384\n\tDD\timagerel $L$SEH_end_ctx_inverse_mod_384\n\tDD\timagerel $L$SEH_info_ctx_inverse_mod_384_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_ctx_inverse_mod_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_ctx_inverse_mod_384_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,08bh,000h\nDB\t000h,0e4h,08ch,000h\nDB\t000h,0d4h,08dh,000h\nDB\t000h,0c4h,08eh,000h\nDB\t000h,034h,08fh,000h\nDB\t000h,054h,090h,000h\nDB\t000h,074h,092h,000h\nDB\t000h,064h,093h,000h\nDB\t000h,001h,091h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_ctx_inverse_mod_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/div3w-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|div_3_limbs|[FUNC]\n\tALIGN\t32\n|div_3_limbs| PROC\n\thint\t#34\n\tldp\tx4,x5,[x0]\n\teor\tx0,x0,x0\n\tmov\tx3,#64\n\tnop\n\n|$Loop|\n\tsubs\tx6,x4,x1\n\tadd\tx0,x0,x0\n\tsbcs\tx7,x5,x2\n\tadd\tx0,x0,#1\n\tcsello\tx4,x4,x6\n\textr\tx1,x2,x1,#1\n\tcsello\tx5,x5,x7\n\tlsr\tx2,x2,#1\n\tsbc\tx0,x0,xzr\n\tsub\tx3,x3,#1\n\tcbnz\tx3,|$Loop|\n\n\tasr\tx3,x0,#63\n\tadd\tx0,x0,x0\n\tsubs\tx6,x4,x1\n\tadd\tx0,x0,#1\n\tsbcs\tx7,x5,x2\n\tsbc\tx0,x0,xzr\n\n\torr\tx0,x0,x3\n\n\tret\n\tENDP\n\n\n\tEXPORT\t|quot_rem_128|[FUNC]\n\tALIGN\t32\n|quot_rem_128| PROC\n\thint\t#34\n\tldp\tx3,x4,[x1]\n\n\tmul\tx5,x3,x2\n\tumulh\tx6,x3,x2\n\tmul\tx11,  x4,x2\n\tumulh\tx7,x4,x2\n\n\tldp\tx8,x9,[x0]\n\tldr\tx10,[x0,#16]\n\n\tadds\tx6,x6,x11\n\tadc\tx7,x7,xzr\n\n\tsubs\tx8,x8,x5\n\tsbcs\tx9,x9,x6\n\tsbcs\tx10,x10,x7\n\tsbc\tx5,xzr,xzr\n\n\tadd\tx2,x2,x5\n\tand\tx3,x3,x5\n\tand\tx4,x4,x5\n\tadds\tx8,x8,x3\n\tadc\tx9,x9,x4\n\n\tstp\tx8,x9,[x0]\n\tstr\tx2,[x0,#16]\n\n\tmov\tx0,x2\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|quot_rem_64|[FUNC]\n\tALIGN\t32\n|quot_rem_64| PROC\n\thint\t#34\n\tldr\tx3,[x1]\n\tldr\tx8,[x0]\n\n\tmul\tx5,x3,x2\n\n\tsub\tx8,x8,x5\n\n\tstp\tx8,x2,[x0]\n\n\tmov\tx0,x2\n\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/div3w-x86_64.asm",
    "content": "OPTION\tDOTNAME\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tdiv_3_limbs\n\n\nALIGN\t32\ndiv_3_limbs\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_div_3_limbs::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n$L$SEH_body_div_3_limbs::\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rdi]\n\tmov\tr9,QWORD PTR[8+rdi]\n\txor\trax,rax\n\tmov\tecx,64\n\n$L$oop::\n\tmov\tr10,r8\n\tsub\tr8,rsi\n\tmov\tr11,r9\n\tsbb\tr9,rdx\n\tlea\trax,QWORD PTR[1+rax*1+rax]\n\tmov\trdi,rdx\n\tcmovc\tr8,r10\n\tcmovc\tr9,r11\n\tsbb\trax,0\n\tshl\trdi,63\n\tshr\trsi,1\n\tshr\trdx,1\n\tor\trsi,rdi\n\tsub\tecx,1\n\tjnz\t$L$oop\n\n\tlea\trcx,QWORD PTR[1+rax*1+rax]\n\tsar\trax,63\n\n\tsub\tr8,rsi\n\tsbb\tr9,rdx\n\tsbb\trcx,0\n\n\tor\trax,rcx\n\n$L$SEH_epilogue_div_3_limbs::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_div_3_limbs::\ndiv_3_limbs\tENDP\nPUBLIC\tquot_rem_128\n\n\nALIGN\t32\nquot_rem_128\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_quot_rem_128::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n$L$SEH_body_quot_rem_128::\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trax,rdx\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[rsi]\n\tmov\tr8,rax\n\tmov\trax,rcx\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr9,rax\n\tadc\trdx,0\n\n\tmov\tr10,QWORD PTR[rdi]\n\tmov\tr11,QWORD PTR[8+rdi]\n\tmov\trax,QWORD PTR[16+rdi]\n\n\tsub\tr10,r8\n\tsbb\tr11,r9\n\tsbb\trax,rdx\n\tsbb\tr8,r8\n\n\tadd\trcx,r8\n\tmov\tr9,r8\n\tand\tr8,QWORD PTR[rsi]\n\tand\tr9,QWORD PTR[8+rsi]\n\tadd\tr10,r8\n\tadc\tr11,r9\n\n\tmov\tQWORD PTR[rdi],r10\n\tmov\tQWORD PTR[8+rdi],r11\n\tmov\tQWORD PTR[16+rdi],rcx\n\n\tmov\trax,rcx\n\n$L$SEH_epilogue_quot_rem_128::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_quot_rem_128::\nquot_rem_128\tENDP\n\n\n\n\n\nPUBLIC\tquot_rem_64\n\n\nALIGN\t32\nquot_rem_64\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_quot_rem_64::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n$L$SEH_body_quot_rem_64::\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trax,rdx\n\timul\trdx,QWORD PTR[rsi]\n\n\tmov\tr10,QWORD PTR[rdi]\n\n\tsub\tr10,rdx\n\n\tmov\tQWORD PTR[rdi],r10\n\tmov\tQWORD PTR[8+rdi],rax\n\n$L$SEH_epilogue_quot_rem_64::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_quot_rem_64::\nquot_rem_64\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_div_3_limbs\n\tDD\timagerel $L$SEH_body_div_3_limbs\n\tDD\timagerel $L$SEH_info_div_3_limbs_prologue\n\n\tDD\timagerel $L$SEH_body_div_3_limbs\n\tDD\timagerel $L$SEH_epilogue_div_3_limbs\n\tDD\timagerel $L$SEH_info_div_3_limbs_body\n\n\tDD\timagerel $L$SEH_epilogue_div_3_limbs\n\tDD\timagerel $L$SEH_end_div_3_limbs\n\tDD\timagerel $L$SEH_info_div_3_limbs_epilogue\n\n\tDD\timagerel $L$SEH_begin_quot_rem_128\n\tDD\timagerel $L$SEH_body_quot_rem_128\n\tDD\timagerel $L$SEH_info_quot_rem_128_prologue\n\n\tDD\timagerel $L$SEH_body_quot_rem_128\n\tDD\timagerel $L$SEH_epilogue_quot_rem_128\n\tDD\timagerel $L$SEH_info_quot_rem_128_body\n\n\tDD\timagerel $L$SEH_epilogue_quot_rem_128\n\tDD\timagerel $L$SEH_end_quot_rem_128\n\tDD\timagerel $L$SEH_info_quot_rem_128_epilogue\n\n\tDD\timagerel $L$SEH_begin_quot_rem_64\n\tDD\timagerel $L$SEH_body_quot_rem_64\n\tDD\timagerel $L$SEH_info_quot_rem_64_prologue\n\n\tDD\timagerel $L$SEH_body_quot_rem_64\n\tDD\timagerel $L$SEH_epilogue_quot_rem_64\n\tDD\timagerel $L$SEH_info_quot_rem_64_body\n\n\tDD\timagerel $L$SEH_epilogue_quot_rem_64\n\tDD\timagerel $L$SEH_end_quot_rem_64\n\tDD\timagerel $L$SEH_info_quot_rem_64_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_div_3_limbs_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_div_3_limbs_body::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_div_3_limbs_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_quot_rem_128_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_quot_rem_128_body::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_quot_rem_128_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_quot_rem_64_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_quot_rem_64_body::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_quot_rem_64_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/dll.c",
    "content": "#include <windows.h>\n\n#if defined(_MSC_VER)\n/*\n * Even though we don't have memcpy/memset anywhere, MSVC compiler\n * generates calls to them as it recognizes corresponding patterns.\n */\nvoid *memcpy(unsigned char *dst, const unsigned char *src, size_t n)\n{\n    void *ret = dst;\n\n    while(n--)\n        *dst++ = *src++;\n\n    return ret;\n}\n\nvoid *memset(unsigned char *dst, int c, size_t n)\n{\n    void *ret = dst;\n\n    while(n--)\n        *dst++ = (unsigned char)c;\n\n    return ret;\n}\n#elif defined(__GNUC__)\n# pragma GCC diagnostic ignored \"-Wunused-parameter\"\n#endif\n\nBOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)\n{   return TRUE;   }\n"
  },
  {
    "path": "build/win64/mul_mont_256-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|mul_mont_sparse_256|[FUNC]\n\tALIGN\t32\n|mul_mont_sparse_256| PROC\n\thint\t#34\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx10,x11,[x1]\n\tldr\tx9,        [x2]\n\tldp\tx12,x13,[x1,#16]\n\n\tmul\tx19,x10,x9\n\tldp\tx5,x6,[x3]\n\tmul\tx20,x11,x9\n\tldp\tx7,x8,[x3,#16]\n\tmul\tx21,x12,x9\n\tmul\tx22,x13,x9\n\n\tumulh\tx14,x10,x9\n\tumulh\tx15,x11,x9\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tumulh\tx17,x13,x9\n\tadds\tx20,x20,x14\n\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,xzr,    x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*1]\n\tsubs\txzr,x19,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*2]\n\tsubs\txzr,x19,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tldr\tx9,[x2,8*3]\n\tsubs\txzr,x19,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tmul\tx14,x10,x9\n\tadcs\tx20,x21,x15\n\tmul\tx15,x11,x9\n\tadcs\tx21,x22,x16\n\tmul\tx16,x12,x9\n\tadcs\tx22,x23,x17\n\tmul\tx17,x13,x9\n\tadc\tx23,xzr,xzr\n\n\tadds\tx19,x19,x14\n\tumulh\tx14,x10,x9\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x11,x9\n\tadcs\tx21,x21,x16\n\tmul\tx3,x4,x19\n\tumulh\tx16,x12,x9\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x13,x9\n\tadc\tx23,x23,xzr\n\n\tadds\tx20,x20,x14\n\n\tadcs\tx21,x21,x15\n\tmul\tx15,x6,x3\n\tadcs\tx22,x22,x16\n\tmul\tx16,x7,x3\n\tadc\tx23,x23,x17\n\tmul\tx17,x8,x3\n\tsubs\txzr,x19,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx20,x20,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx21,x21,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx22,x22,x17\n\tumulh\tx17,x8,x3\n\tadc\tx23,x23,xzr\n\n\tadds\tx19,x20,x14\n\tadcs\tx20,x21,x15\n\tadcs\tx21,x22,x16\n\tadcs\tx22,x23,x17\n\tadc\tx23,xzr,xzr\n\n\tsubs\tx14,x19,x5\n\tsbcs\tx15,x20,x6\n\tsbcs\tx16,x21,x7\n\tsbcs\tx17,x22,x8\n\tsbcs\txzr,    x23,xzr\n\n\tcsello\tx19,x19,x14\n\tcsello\tx20,x20,x15\n\tcsello\tx21,x21,x16\n\tcsello\tx22,x22,x17\n\n\tstp\tx19,x20,[x0]\n\tstp\tx21,x22,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\tret\n\tENDP\n\n\n\tEXPORT\t|sqr_mont_sparse_256|[FUNC]\n\tALIGN\t32\n|sqr_mont_sparse_256| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-6*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tmov\tx4,x3\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\tmul\tx11,x6,x5\n\tumulh\tx15,x6,x5\n\tmul\tx12,x7,x5\n\tumulh\tx16,x7,x5\n\tmul\tx13,x8,x5\n\tumulh\tx19,x8,x5\n\n\tadds\tx12,x12,x15\n\tmul\tx14,x7,x6\n\tumulh\tx15,x7,x6\n\tadcs\tx13,x13,x16\n\tmul\tx16,x8,x6\n\tumulh\tx17,x8,x6\n\tadc\tx19,x19,xzr\n\n\tmul\tx20,x8,x7\n\tumulh\tx21,x8,x7\n\n\tadds\tx15,x15,x16\n\tmul\tx10,x5,x5\n\tadc\tx16,x17,xzr\n\n\tadds\tx13,x13,x14\n\tumulh\tx5,x5,x5\n\tadcs\tx19,x19,x15\n\tmul\tx15,x6,x6\n\tadcs\tx20,x20,x16\n\tumulh\tx6,x6,x6\n\tadc\tx21,x21,xzr\n\n\tadds\tx11,x11,x11\n\tmul\tx16,x7,x7\n\tadcs\tx12,x12,x12\n\tumulh\tx7,x7,x7\n\tadcs\tx13,x13,x13\n\tmul\tx17,x8,x8\n\tadcs\tx19,x19,x19\n\tumulh\tx8,x8,x8\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadc\tx22,xzr,xzr\n\n\tadds\tx11,x11,x5\n\tadcs\tx12,x12,x15\n\tadcs\tx13,x13,x6\n\tadcs\tx19,x19,x16\n\tadcs\tx20,x20,x7\n\tadcs\tx21,x21,x17\n\tadc\tx22,x22,x8\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadds\tx10,x10,x19\n\tadcs\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadc\tx19,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x19,xzr\n\n\tcsello\tx10,x10,x14\n\tcsello\tx11,x11,x15\n\tcsello\tx12,x12,x16\n\tcsello\tx13,x13,x17\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#6*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tEXPORT\t|from_mont_256|[FUNC]\n\tALIGN\t32\n|from_mont_256| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\n\tcsello\tx10,x10,x14\n\tcsello\tx11,x11,x15\n\tcsello\tx12,x12,x16\n\tcsello\tx13,x13,x17\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|redc_mont_256|[FUNC]\n\tALIGN\t32\n|redc_mont_256| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tmov\tx4,x3\n\tldp\tx10,x11,[x1]\n\tldp\tx12,x13,[x1,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx14,x15,[x1,#32]\n\tldp\tx16,x17,[x1,#48]\n\n\tadds\tx10,x10,x14\n\tadcs\tx11,x11,x15\n\tadcs\tx12,x12,x16\n\tadcs\tx13,x13,x17\n\tadc\tx9,xzr,xzr\n\n\tsubs\tx14,x10,x5\n\tsbcs\tx15,x11,x6\n\tsbcs\tx16,x12,x7\n\tsbcs\tx17,x13,x8\n\tsbcs\txzr,    x9,xzr\n\n\tcsello\tx10,x10,x14\n\tcsello\tx11,x11,x15\n\tcsello\tx12,x12,x16\n\tcsello\tx13,x13,x17\n\n\tstp\tx10,x11,[x0]\n\tstp\tx12,x13,[x0,#16]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__mul_by_1_mont_256| PROC\n\tmul\tx3,x4,x10\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tmul\tx3,x4,x10\n\tadc\tx13,x9,x17\n\n\tmul\tx15,x6,x3\n\tmul\tx16,x7,x3\n\tmul\tx17,x8,x3\n\tsubs\txzr,x10,#1\n\tumulh\tx14,x5,x3\n\tadcs\tx11,x11,x15\n\tumulh\tx15,x6,x3\n\tadcs\tx12,x12,x16\n\tumulh\tx16,x7,x3\n\tadcs\tx13,x13,x17\n\tumulh\tx17,x8,x3\n\tadc\tx9,xzr,xzr\n\n\tadds\tx10,x11,x14\n\tadcs\tx11,x12,x15\n\tadcs\tx12,x13,x16\n\tadc\tx13,x9,x17\n\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/mul_mont_384-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\n\n\tEXPORT\t|add_mod_384x384|[FUNC]\n\tALIGN\t32\n|add_mod_384x384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__add_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__add_mod_384x384| PROC\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tadcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tadcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tadcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsello\tx11,x11,x19\n\tcsello\tx12,x12,x20\n\tcsello\tx13,x13,x21\n\tcsello\tx14,x14,x22\n\tstp\tx11,x12,[x0,#48]\n\tcsello\tx15,x15,x23\n\tstp\tx13,x14,[x0,#64]\n\tcsello\tx16,x16,x24\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sub_mod_384x384|[FUNC]\n\tALIGN\t32\n|sub_mod_384x384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-8*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#8*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__sub_mod_384x384| PROC\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tstp\tx11,  x12,  [x0]\n\tsbcs\tx15,x15,x23\n\tldp\tx11,  x12,  [x1,#48]\n\tsbcs\tx16,x16,x24\n\n\tldp\tx19,x20,[x2,#48]\n\tstp\tx13,  x14,  [x0,#16]\n\tldp\tx13,  x14,  [x1,#64]\n\tldp\tx21,x22,[x2,#64]\n\n\tsbcs\tx11,x11,x19\n\tstp\tx15,  x16,  [x0,#32]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#80]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#80]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__add_mod_384| PROC\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tadds\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tadcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tadcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x17,xzr\n\n\tcsello\tx11,x11,x19\n\tcsello\tx12,x12,x20\n\tcsello\tx13,x13,x21\n\tcsello\tx14,x14,x22\n\tcsello\tx15,x15,x23\n\tstp\tx11,x12,[x0]\n\tcsello\tx16,x16,x24\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__sub_mod_384| PROC\n\tldp\tx11,  x12,  [x1]\n\tldp\tx19,x20,[x2]\n\tldp\tx13,  x14,  [x1,#16]\n\tsubs\tx11,x11,x19\n\tldp\tx21,x22,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tldp\tx15,  x16,  [x1,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx23,x24,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx17,xzr,xzr\n\n\tand\tx19,x5,x17\n\tand\tx20,x6,x17\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x17\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x17\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x17\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x17\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_mont_384x|[FUNC]\n\tALIGN\t32\n|mul_mont_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#288\n\n\tmov\tx26,x0\n\tmov\tx27,x1\n\tmov\tx28,x2\n\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tadd\tx1,x1,#48\n\tadd\tx2,x2,#48\n\tadd\tx0,sp,#96\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tsub\tx2,x1,#48\n\tadd\tx0,sp,#240\n\tbl\t__add_mod_384\n\n\tadd\tx1,x28,#0\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#192\n\tbl\t__add_mod_384\n\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,x0\n\tadd\tx2,sp,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,sp,#96\n\tbl\t__sub_mod_384x384\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#96\n\tadd\tx0,sp,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx1,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\n\tadd\tx1,sp,#192\n\tadd\tx0,x0,#48\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#288\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sqr_mont_384x|[FUNC]\n\tALIGN\t32\n|sqr_mont_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\n\tmov\tx4,x3\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tadd\tx0,sp,#0\n\tbl\t__add_mod_384\n\n\tadd\tx0,sp,#48\n\tbl\t__sub_mod_384\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__mul_mont_384\n\n\tadds\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsello\tx19,x11,x19\n\tcsello\tx20,x12,x20\n\tcsello\tx21,x13,x21\n\tldp\tx11,x12,[sp]\n\tcsello\tx22,x14,x22\n\tldr\tx17,        [sp,#48]\n\tcsello\tx23,x15,x23\n\tldp\tx13,x14,[sp,#16]\n\tcsello\tx24,x16,x24\n\tldp\tx15,x16,[sp,#32]\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_mont_384|[FUNC]\n\tALIGN\t32\n|mul_mont_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__mul_mont_384| PROC\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tmov\tx17,xzr\n\tsubs\txzr,x19,#1\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*1]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*2]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*3]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*4]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tadc\tx4,x17,xzr\n\tldr\tx17,[x2,8*5]\n\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,x4,xzr\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadcs\tx25,x25,xzr\n\tadc\tx17,xzr,xzr\n\n\tadds\tx20,x20,x26\n\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadcs\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadc\tx17,x17,xzr\n\tsubs\txzr,x19,#1\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadcs\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\n\tadc\tx17,x17,xzr\n\n\tadds\tx19,x20,x26\n\tadcs\tx20,x21,x27\n\tadcs\tx21,x22,x28\n\tadcs\tx22,x23,x0\n\tadcs\tx23,x24,x1\n\tadcs\tx24,x25,x3\n\tadc\tx25,x17,xzr\n\n\tsubs\tx26,x19,x5\n\tsbcs\tx27,x20,x6\n\tsbcs\tx28,x21,x7\n\tsbcs\tx0,x22,x8\n\tsbcs\tx1,x23,x9\n\tsbcs\tx3,x24,x10\n\tsbcs\txzr,    x25,xzr\n\n\tcsello\tx11,x19,x26\n\tcsello\tx12,x20,x27\n\tcsello\tx13,x21,x28\n\tcsello\tx14,x22,x0\n\tcsello\tx15,x23,x1\n\tcsello\tx16,x24,x3\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sqr_mont_384|[FUNC]\n\tALIGN\t32\n|sqr_mont_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\n\tmov\tx4,x3\n\n\tmov\tx3,x0\n\tmov\tx0,sp\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tmov\tx1,sp\n\tmov\tx0,x3\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sqr_n_mul_mont_383|[FUNC]\n\tALIGN\t32\n|sqr_n_mul_mont_383| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx4,x0,[sp,#12*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\n\tmov\tx17,x5\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\tmov\tx0,sp\n|$Loop_sqr_383|\n\tbl\t__sqr_384\n\tsub\tx2,x2,#1\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tmov\tx1,sp\n\tbl\t__mul_by_1_mont_384\n\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tcbnz\tx2,|$Loop_sqr_383|\n\n\tmov\tx2,x17\n\tldr\tx17,[x17]\n\tbl\t__mul_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\tALIGN\t32\n|__sqr_384| PROC\n\tmul\tx19,x12,x11\n\tmul\tx20,x13,x11\n\tmul\tx21,x14,x11\n\tmul\tx22,x15,x11\n\tmul\tx23,x16,x11\n\n\tumulh\tx6,x12,x11\n\tumulh\tx7,x13,x11\n\tumulh\tx8,x14,x11\n\tumulh\tx9,x15,x11\n\tadds\tx20,x20,x6\n\tumulh\tx10,x16,x11\n\tadcs\tx21,x21,x7\n\tmul\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tmul\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tmul\tx9,x15,x12\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x12\n\n\tadds\tx21,x21,x7\n\tumulh\tx7,x13,x12\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x12\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x12\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x12\n\tadc\tx25,xzr,xzr\n\n\tmul\tx5,x11,x11\n\tadds\tx22,x22,x7\n\tumulh\tx11,  x11,x11\n\tadcs\tx23,x23,x8\n\tmul\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tmul\tx9,x15,x13\n\tadc\tx25,x25,x10\n\tmul\tx10,x16,x13\n\n\tadds\tx23,x23,x8\n\tumulh\tx8,x14,x13\n\tadcs\tx24,x24,x9\n\tumulh\tx9,x15,x13\n\tadcs\tx25,x25,x10\n\tumulh\tx10,x16,x13\n\tadc\tx26,xzr,xzr\n\n\tmul\tx6,x12,x12\n\tadds\tx24,x24,x8\n\tumulh\tx12,  x12,x12\n\tadcs\tx25,x25,x9\n\tmul\tx9,x15,x14\n\tadc\tx26,x26,x10\n\tmul\tx10,x16,x14\n\n\tadds\tx25,x25,x9\n\tumulh\tx9,x15,x14\n\tadcs\tx26,x26,x10\n\tumulh\tx10,x16,x14\n\tadc\tx27,xzr,xzr\n\tmul\tx7,x13,x13\n\tadds\tx26,x26,x9\n\tumulh\tx13,  x13,x13\n\tadc\tx27,x27,x10\n\tmul\tx8,x14,x14\n\n\tmul\tx10,x16,x15\n\tumulh\tx14,  x14,x14\n\tadds\tx27,x27,x10\n\tumulh\tx10,x16,x15\n\tmul\tx9,x15,x15\n\tadc\tx28,x10,xzr\n\n\tadds\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tadcs\tx21,x21,x21\n\tadcs\tx22,x22,x22\n\tadcs\tx23,x23,x23\n\tadcs\tx24,x24,x24\n\tadcs\tx25,x25,x25\n\tadcs\tx26,x26,x26\n\tumulh\tx15,  x15,x15\n\tadcs\tx27,x27,x27\n\tmul\tx10,x16,x16\n\tadcs\tx28,x28,x28\n\tumulh\tx16,  x16,x16\n\tadc\tx1,xzr,xzr\n\n\tadds\tx19,x19,x11\n\tadcs\tx20,x20,x6\n\tadcs\tx21,x21,x12\n\tadcs\tx22,x22,x7\n\tadcs\tx23,x23,x13\n\tadcs\tx24,x24,x8\n\tadcs\tx25,x25,x14\n\tstp\tx5,x19,[x0]\n\tadcs\tx26,x26,x9\n\tstp\tx20,x21,[x0,#16]\n\tadcs\tx27,x27,x15\n\tstp\tx22,x23,[x0,#32]\n\tadcs\tx28,x28,x10\n\tstp\tx24,x25,[x0,#48]\n\tadc\tx16,x16,x1\n\tstp\tx26,x27,[x0,#64]\n\tstp\tx28,x16,[x0,#80]\n\n\tret\n\tENDP\n\n\n\tEXPORT\t|sqr_384|[FUNC]\n\tALIGN\t32\n|sqr_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tbl\t__sqr_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|redc_mont_384|[FUNC]\n\tALIGN\t32\n|redc_mont_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|from_mont_384|[FUNC]\n\tALIGN\t32\n|from_mont_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tmov\tx4,x3\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\n\tcsello\tx11,x11,x19\n\tcsello\tx12,x12,x20\n\tcsello\tx13,x13,x21\n\tcsello\tx14,x14,x22\n\tcsello\tx15,x15,x23\n\tcsello\tx16,x16,x24\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__mul_by_1_mont_384| PROC\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tmul\tx26,x4,x11\n\tldp\tx15,x16,[x1,#32]\n\n\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tmul\tx26,x4,x11\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\n\tmul\tx20,x6,x26\n\tmul\tx21,x7,x26\n\tmul\tx22,x8,x26\n\tmul\tx23,x9,x26\n\tmul\tx24,x10,x26\n\tsubs\txzr,x11,#1\n\tumulh\tx11,x5,x26\n\tadcs\tx20,x20,x12\n\tumulh\tx12,x6,x26\n\tadcs\tx21,x21,x13\n\tumulh\tx13,x7,x26\n\tadcs\tx22,x22,x14\n\tumulh\tx14,x8,x26\n\tadcs\tx23,x23,x15\n\tumulh\tx15,x9,x26\n\tadcs\tx24,x24,x16\n\tumulh\tx16,x10,x26\n\tadc\tx25,xzr,xzr\n\tadds\tx11,x11,x20\n\tadcs\tx12,x12,x21\n\tadcs\tx13,x13,x22\n\tadcs\tx14,x14,x23\n\tadcs\tx15,x15,x24\n\tadc\tx16,x16,x25\n\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__redc_tail_mont_384| PROC\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadcs\tx16,x16,x24\n\tadc\tx25,xzr,xzr\n\n\tsubs\tx19,x11,x5\n\tsbcs\tx20,x12,x6\n\tsbcs\tx21,x13,x7\n\tsbcs\tx22,x14,x8\n\tsbcs\tx23,x15,x9\n\tsbcs\tx24,x16,x10\n\tsbcs\txzr,x25,xzr\n\n\tcsello\tx11,x11,x19\n\tcsello\tx12,x12,x20\n\tcsello\tx13,x13,x21\n\tcsello\tx14,x14,x22\n\tcsello\tx15,x15,x23\n\tcsello\tx16,x16,x24\n\n\tstp\tx11,x12,[x0]\n\tstp\tx13,x14,[x0,#16]\n\tstp\tx15,x16,[x0,#32]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_384|[FUNC]\n\tALIGN\t32\n|mul_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__mul_384| PROC\n\tldp\tx11,x12,[x1]\n\tldr\tx17,        [x2]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\n\tumulh\tx5,x11,x17\n\tumulh\tx6,x12,x17\n\tumulh\tx7,x13,x17\n\tumulh\tx8,x14,x17\n\tumulh\tx9,x15,x17\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,8*1]\n\n\tstr\tx19,[x0]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,xzr,    x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(1+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*1]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(2+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*2]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(3+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*3]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tldr\tx17,[x2,#8*(4+1)]\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*4]\n\tadds\tx19,x20,x5\n\tmul\tx5,x11,x17\n\tadcs\tx20,x21,x6\n\tmul\tx6,x12,x17\n\tadcs\tx21,x22,x7\n\tmul\tx7,x13,x17\n\tadcs\tx22,x23,x8\n\tmul\tx8,x14,x17\n\tadcs\tx23,x24,x9\n\tmul\tx9,x15,x17\n\tadc\tx24,x25,x10\n\tmul\tx10,x16,x17\n\tadds\tx19,x19,x5\n\tumulh\tx5,x11,x17\n\tadcs\tx20,x20,x6\n\tumulh\tx6,x12,x17\n\tadcs\tx21,x21,x7\n\tumulh\tx7,x13,x17\n\tadcs\tx22,x22,x8\n\tumulh\tx8,x14,x17\n\tadcs\tx23,x23,x9\n\tumulh\tx9,x15,x17\n\tadcs\tx24,x24,x10\n\tumulh\tx10,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tstr\tx19,[x0,8*5]\n\tadds\tx19,x20,x5\n\tadcs\tx20,x21,x6\n\tadcs\tx21,x22,x7\n\tadcs\tx22,x23,x8\n\tadcs\tx23,x24,x9\n\tadc\tx24,x25,x10\n\n\tstp\tx19,x20,[x0,#48]\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|mul_382x|[FUNC]\n\tALIGN\t32\n|mul_382x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#96\n\n\tldp\tx11,x12,[x1]\n\tmov\tx26,x0\n\tldp\tx19,x20,[x1,#48]\n\tmov\tx27,x1\n\tldp\tx13,x14,[x1,#16]\n\tmov\tx28,x2\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx15,x16,[x1,#32]\n\tadds\tx5,x11,x19\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx6,x12,x20\n\tldp\tx11,x12,[x2]\n\tadcs\tx7,x13,x21\n\tldp\tx19,x20,[x2,#48]\n\tadcs\tx8,x14,x22\n\tldp\tx13,x14,[x2,#16]\n\tadcs\tx9,x15,x23\n\tldp\tx21,x22,[x2,#64]\n\tadc\tx10,x16,x24\n\tldp\tx15,x16,[x2,#32]\n\n\tstp\tx5,x6,[sp]\n\tadds\tx5,x11,x19\n\tldp\tx23,x24,[x2,#80]\n\tadcs\tx6,x12,x20\n\tstp\tx7,x8,[sp,#16]\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tstp\tx9,x10,[sp,#32]\n\tadcs\tx9,x15,x23\n\tstp\tx5,x6,[sp,#48]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[sp,#64]\n\tstp\tx9,x10,[sp,#80]\n\n\tbl\t__mul_384\n\n\tadd\tx1,sp,#0\n\tadd\tx2,sp,#48\n\tadd\tx0,x26,#96\n\tbl\t__mul_384\n\n\tadd\tx1,x27,#48\n\tadd\tx2,x28,#48\n\tadd\tx0,sp,#0\n\tbl\t__mul_384\n\n\tldp\tx5,x6,[x3]\n\tldp\tx7,x8,[x3,#16]\n\tldp\tx9,x10,[x3,#32]\n\n\tadd\tx1,x26,#96\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#96\n\tbl\t__sub_mod_384x384\n\n\tadd\tx2,x26,#0\n\tbl\t__sub_mod_384x384\n\n\tadd\tx1,x26,#0\n\tadd\tx2,sp,#0\n\tadd\tx0,x26,#0\n\tbl\t__sub_mod_384x384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tadd\tsp,sp,#96\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sqr_382x|[FUNC]\n\tALIGN\t32\n|sqr_382x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x1]\n\tldp\tx19,x20,[x1,#48]\n\tldp\tx13,x14,[x1,#16]\n\tadds\tx5,x11,x19\n\tldp\tx21,x22,[x1,#64]\n\tadcs\tx6,x12,x20\n\tldp\tx15,x16,[x1,#32]\n\tadcs\tx7,x13,x21\n\tldp\tx23,x24,[x1,#80]\n\tadcs\tx8,x14,x22\n\tstp\tx5,x6,[x0]\n\tadcs\tx9,x15,x23\n\tldp\tx5,x6,[x2]\n\tadc\tx10,x16,x24\n\tstp\tx7,x8,[x0,#16]\n\n\tsubs\tx11,x11,x19\n\tldp\tx7,x8,[x2,#16]\n\tsbcs\tx12,x12,x20\n\tstp\tx9,x10,[x0,#32]\n\tsbcs\tx13,x13,x21\n\tldp\tx9,x10,[x2,#32]\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tadds\tx11,x11,x19\n\tand\tx21,x7,x25\n\tadcs\tx12,x12,x20\n\tand\tx22,x8,x25\n\tadcs\tx13,x13,x21\n\tand\tx23,x9,x25\n\tadcs\tx14,x14,x22\n\tand\tx24,x10,x25\n\tadcs\tx15,x15,x23\n\tstp\tx11,x12,[x0,#48]\n\tadc\tx16,x16,x24\n\tstp\tx13,x14,[x0,#64]\n\tstp\tx15,x16,[x0,#80]\n\n\tmov\tx4,x1\n\tadd\tx1,x0,#0\n\tadd\tx2,x0,#48\n\tbl\t__mul_384\n\n\tadd\tx1,x4,#0\n\tadd\tx2,x4,#48\n\tadd\tx0,x0,#96\n\tbl\t__mul_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldp\tx11,x12,[x0]\n\tldp\tx13,x14,[x0,#16]\n\tadds\tx11,x11,x11\n\tldp\tx15,x16,[x0,#32]\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadcs\tx19,x19,x19\n\tadcs\tx20,x20,x20\n\tstp\tx11,x12,[x0]\n\tadcs\tx21,x21,x21\n\tstp\tx13,x14,[x0,#16]\n\tadcs\tx22,x22,x22\n\tstp\tx15,x16,[x0,#32]\n\tadcs\tx23,x23,x23\n\tstp\tx19,x20,[x0,#48]\n\tadc\tx24,x24,x24\n\tstp\tx21,x22,[x0,#64]\n\tstp\tx23,x24,[x0,#80]\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sqr_mont_382x|[FUNC]\n\tALIGN\t32\n|sqr_mont_382x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\tstp\tx3,x0,[sp,#12*__SIZEOF_POINTER__]\n\tsub\tsp,sp,#112\n\tmov\tx4,x3\n\n\tldp\tx11,x12,[x1]\n\tldp\tx13,x14,[x1,#16]\n\tldp\tx15,x16,[x1,#32]\n\n\tldp\tx17,x20,[x1,#48]\n\tldp\tx21,x22,[x1,#64]\n\tldp\tx23,x24,[x1,#80]\n\n\tadds\tx5,x11,x17\n\tadcs\tx6,x12,x20\n\tadcs\tx7,x13,x21\n\tadcs\tx8,x14,x22\n\tadcs\tx9,x15,x23\n\tadc\tx10,x16,x24\n\n\tsubs\tx19,x11,x17\n\tsbcs\tx20,x12,x20\n\tsbcs\tx21,x13,x21\n\tsbcs\tx22,x14,x22\n\tsbcs\tx23,x15,x23\n\tsbcs\tx24,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tstp\tx5,x6,[sp]\n\tstp\tx7,x8,[sp,#16]\n\tstp\tx9,x10,[sp,#32]\n\tstp\tx19,x20,[sp,#48]\n\tstp\tx21,x22,[sp,#64]\n\tstp\tx23,x24,[sp,#80]\n\tstr\tx25,[sp,#96]\n\n\tldp\tx5,x6,[x2]\n\tldp\tx7,x8,[x2,#16]\n\tldp\tx9,x10,[x2,#32]\n\n\tadd\tx2,x1,#48\n\tbl\t__mul_mont_383_nonred\n\n\tadds\tx19,x11,x11\n\tadcs\tx20,x12,x12\n\tadcs\tx21,x13,x13\n\tadcs\tx22,x14,x14\n\tadcs\tx23,x15,x15\n\tadc\tx24,x16,x16\n\n\tstp\tx19,x20,[x2,#48]\n\tstp\tx21,x22,[x2,#64]\n\tstp\tx23,x24,[x2,#80]\n\n\tldp\tx11,x12,[sp]\n\tldr\tx17,[sp,#48]\n\tldp\tx13,x14,[sp,#16]\n\tldp\tx15,x16,[sp,#32]\n\n\tadd\tx2,sp,#48\n\tbl\t__mul_mont_383_nonred\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tldr\tx25,[sp,#96]\n\tldp\tx19,x20,[sp]\n\tldp\tx21,x22,[sp,#16]\n\tldp\tx23,x24,[sp,#32]\n\n\tand\tx19,x19,x25\n\tand\tx20,x20,x25\n\tand\tx21,x21,x25\n\tand\tx22,x22,x25\n\tand\tx23,x23,x25\n\tand\tx24,x24,x25\n\n\tsubs\tx11,x11,x19\n\tsbcs\tx12,x12,x20\n\tsbcs\tx13,x13,x21\n\tsbcs\tx14,x14,x22\n\tsbcs\tx15,x15,x23\n\tsbcs\tx16,x16,x24\n\tsbc\tx25,xzr,xzr\n\n\tand\tx19,x5,x25\n\tand\tx20,x6,x25\n\tand\tx21,x7,x25\n\tand\tx22,x8,x25\n\tand\tx23,x9,x25\n\tand\tx24,x10,x25\n\n\tadds\tx11,x11,x19\n\tadcs\tx12,x12,x20\n\tadcs\tx13,x13,x21\n\tadcs\tx14,x14,x22\n\tadcs\tx15,x15,x23\n\tadc\tx16,x16,x24\n\n\tstp\tx11,x12,[x2]\n\tstp\tx13,x14,[x2,#16]\n\tstp\tx15,x16,[x2,#32]\n\n\tadd\tsp,sp,#112\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\tALIGN\t32\n|__mul_mont_383_nonred| PROC\n\tmul\tx19,x11,x17\n\tmul\tx20,x12,x17\n\tmul\tx21,x13,x17\n\tmul\tx22,x14,x17\n\tmul\tx23,x15,x17\n\tmul\tx24,x16,x17\n\tmul\tx4,x4,x19\n\n\tumulh\tx26,x11,x17\n\tumulh\tx27,x12,x17\n\tumulh\tx28,x13,x17\n\tumulh\tx0,x14,x17\n\tumulh\tx1,x15,x17\n\tumulh\tx3,x16,x17\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,xzr,    x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*1]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*2]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*3]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*4]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tldr\tx17,[x2,8*5]\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\n\tldr\tx4,[x29,#12*__SIZEOF_POINTER__]\n\tadds\tx19,x20,x26\n\tmul\tx26,x11,x17\n\tadcs\tx20,x21,x27\n\tmul\tx27,x12,x17\n\tadcs\tx21,x22,x28\n\tmul\tx28,x13,x17\n\tadcs\tx22,x23,x0\n\tmul\tx0,x14,x17\n\tadcs\tx23,x24,x1\n\tmul\tx1,x15,x17\n\tadcs\tx24,x25,x3\n\tmul\tx3,x16,x17\n\tadc\tx25,xzr,xzr\n\n\tadds\tx19,x19,x26\n\tumulh\tx26,x11,x17\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x12,x17\n\tadcs\tx21,x21,x28\n\tmul\tx4,x4,x19\n\tumulh\tx28,x13,x17\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x14,x17\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x15,x17\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x16,x17\n\tadc\tx25,x25,xzr\n\n\tadds\tx20,x20,x26\n\tmul\tx26,x5,x4\n\tadcs\tx21,x21,x27\n\tmul\tx27,x6,x4\n\tadcs\tx22,x22,x28\n\tmul\tx28,x7,x4\n\tadcs\tx23,x23,x0\n\tmul\tx0,x8,x4\n\tadcs\tx24,x24,x1\n\tmul\tx1,x9,x4\n\tadc\tx25,x25,x3\n\tmul\tx3,x10,x4\n\tadds\tx19,x19,x26\n\tumulh\tx26,x5,x4\n\tadcs\tx20,x20,x27\n\tumulh\tx27,x6,x4\n\tadcs\tx21,x21,x28\n\tumulh\tx28,x7,x4\n\tadcs\tx22,x22,x0\n\tumulh\tx0,x8,x4\n\tadcs\tx23,x23,x1\n\tumulh\tx1,x9,x4\n\tadcs\tx24,x24,x3\n\tumulh\tx3,x10,x4\n\tadc\tx25,x25,xzr\n\tldp\tx4,x2,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\tx11,x20,x26\n\tadcs\tx12,x21,x27\n\tadcs\tx13,x22,x28\n\tadcs\tx14,x23,x0\n\tadcs\tx15,x24,x1\n\tadcs\tx16,x25,x3\n\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sgn0_pty_mont_384|[FUNC]\n\tALIGN\t32\n|sgn0_pty_mont_384| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\tadds\tx11,x11,x11\n\tadcs\tx12,x12,x12\n\tadcs\tx13,x13,x13\n\tadcs\tx14,x14,x14\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|sgn0_pty_mont_384x|[FUNC]\n\tALIGN\t32\n|sgn0_pty_mont_384x| PROC\n\thint\t#25\n\tstp\tx29,x30,[sp,#-16*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\tstp\tx19,x20,[sp,#2*__SIZEOF_POINTER__]\n\tstp\tx21,x22,[sp,#4*__SIZEOF_POINTER__]\n\tstp\tx23,x24,[sp,#6*__SIZEOF_POINTER__]\n\tstp\tx25,x26,[sp,#8*__SIZEOF_POINTER__]\n\tstp\tx27,x28,[sp,#10*__SIZEOF_POINTER__]\n\n\tmov\tx4,x2\n\tldp\tx5,x6,[x1]\n\tldp\tx7,x8,[x1,#16]\n\tldp\tx9,x10,[x1,#32]\n\tmov\tx1,x0\n\n\tbl\t__mul_by_1_mont_384\n\tadd\tx1,x1,#48\n\n\tand\tx2,x11,#1\n\torr\tx3,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx3,x3,x13\n\tadcs\tx12,x12,x12\n\torr\tx3,x3,x14\n\tadcs\tx13,x13,x13\n\torr\tx3,x3,x15\n\tadcs\tx14,x14,x14\n\torr\tx3,x3,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx2,x2,x17\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tx30,[x29,#__SIZEOF_POINTER__]\n\n\tand\tx0,x11,#1\n\torr\tx1,x11,x12\n\tadds\tx11,x11,x11\n\torr\tx1,x1,x13\n\tadcs\tx12,x12,x12\n\torr\tx1,x1,x14\n\tadcs\tx13,x13,x13\n\torr\tx1,x1,x15\n\tadcs\tx14,x14,x14\n\torr\tx1,x1,x16\n\tadcs\tx15,x15,x15\n\tadcs\tx16,x16,x16\n\tadc\tx17,xzr,xzr\n\n\tsubs\tx11,x11,x5\n\tsbcs\tx12,x12,x6\n\tsbcs\tx13,x13,x7\n\tsbcs\tx14,x14,x8\n\tsbcs\tx15,x15,x9\n\tsbcs\tx16,x16,x10\n\tsbc\tx17,x17,xzr\n\n\tmvn\tx17,x17\n\tand\tx17,x17,#2\n\torr\tx0,x0,x17\n\n\tcmp\tx3,#0\n\tcseleq\tx3,x0,x2\n\n\tcmp\tx1,#0\n\tcselne\tx1,x0,x2\n\n\tand\tx3,x3,#1\n\tand\tx1,x1,#2\n\torr\tx0,x1,x3\n\n\tldp\tx19,x20,[x29,#2*__SIZEOF_POINTER__]\n\tldp\tx21,x22,[x29,#4*__SIZEOF_POINTER__]\n\tldp\tx23,x24,[x29,#6*__SIZEOF_POINTER__]\n\tldp\tx25,x26,[x29,#8*__SIZEOF_POINTER__]\n\tldp\tx27,x28,[x29,#10*__SIZEOF_POINTER__]\n\tldr\tx29,[sp],#16*__SIZEOF_POINTER__\n\thint\t#29\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/mulq_mont_256-x86_64.asm",
    "content": "OPTION\tDOTNAME\nEXTERN\tmul_mont_sparse_256$1:NEAR\nEXTERN\tsqr_mont_sparse_256$1:NEAR\nEXTERN\tfrom_mont_256$1:NEAR\nEXTERN\tredc_mont_256$1:NEAR\n_DATA\tSEGMENT\nCOMM\t__blst_platform_cap:DWORD:1\n_DATA\tENDS\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tmul_mont_sparse_256\n\n\nALIGN\t32\nmul_mont_sparse_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_mont_sparse_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tmul_mont_sparse_256$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdi\n\n$L$SEH_body_mul_mont_sparse_256::\n\n\n\tmov\trax,QWORD PTR[rdx]\n\tmov\tr13,QWORD PTR[rsi]\n\tmov\tr14,QWORD PTR[8+rsi]\n\tmov\tr12,QWORD PTR[16+rsi]\n\tmov\trbp,QWORD PTR[24+rsi]\n\tmov\trbx,rdx\n\n\tmov\tr15,rax\n\tmul\tr13\n\tmov\tr9,rax\n\tmov\trax,r15\n\tmov\tr10,rdx\n\tcall\t__mulq_mont_sparse_256\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_mul_mont_sparse_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_mont_sparse_256::\nmul_mont_sparse_256\tENDP\n\nPUBLIC\tsqr_mont_sparse_256\n\n\nALIGN\t32\nsqr_mont_sparse_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_mont_sparse_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_mont_sparse_256$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdi\n\n$L$SEH_body_sqr_mont_sparse_256::\n\n\n\tmov\trax,QWORD PTR[rsi]\n\tmov\tr8,rcx\n\tmov\tr14,QWORD PTR[8+rsi]\n\tmov\trcx,rdx\n\tmov\tr12,QWORD PTR[16+rsi]\n\tlea\trbx,QWORD PTR[rsi]\n\tmov\trbp,QWORD PTR[24+rsi]\n\n\tmov\tr15,rax\n\tmul\trax\n\tmov\tr9,rax\n\tmov\trax,r15\n\tmov\tr10,rdx\n\tcall\t__mulq_mont_sparse_256\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sqr_mont_sparse_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_mont_sparse_256::\nsqr_mont_sparse_256\tENDP\n\nALIGN\t32\n__mulq_mont_sparse_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmul\tr14\n\tadd\tr10,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tr12\n\tadd\tr11,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,QWORD PTR[8+rbx]\n\tadc\trdx,0\n\txor\tr14,r14\n\tmov\tr13,rdx\n\n\tmov\trdi,r9\n\timul\tr9,r8\n\n\n\tmov\tr15,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr10,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr11,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tadd\tr11,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr12,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\tr14,rdx\n\txor\tr15,r15\n\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trdi,rax\n\tmov\trax,r9\n\tadc\trdi,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr10,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr10,rdi\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr11,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr11,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr12,rax\n\tmov\trax,QWORD PTR[16+rbx]\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tadd\tr13,rdx\n\tadc\tr14,0\n\tadc\tr15,0\n\tmov\trdi,r10\n\timul\tr10,r8\n\n\n\tmov\tr9,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr11,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr12,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\tr15,rdx\n\txor\tr9,r9\n\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trdi,rax\n\tmov\trax,r10\n\tadc\trdi,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr11,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr11,rdi\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr13,rax\n\tmov\trax,QWORD PTR[24+rbx]\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tadd\tr14,rdx\n\tadc\tr15,0\n\tadc\tr9,0\n\tmov\trdi,r11\n\timul\tr11,r8\n\n\n\tmov\tr10,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr13,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr15,rbp\n\tadc\tr9,rdx\n\txor\tr10,r10\n\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trdi,rax\n\tmov\trax,r11\n\tadc\trdi,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr12,rdi\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tadd\tr15,rdx\n\tadc\tr9,0\n\tadc\tr10,0\n\timul\trax,r8\n\tmov\trsi,QWORD PTR[8+rsp]\n\n\n\tmov\tr11,rax\n\tmul\tQWORD PTR[rcx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\tr12,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r12\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr14,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tmov\trbx,r14\n\tadd\tr15,rbp\n\tadc\trdx,0\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr9,rdx\n\tadc\tr10,0\n\n\n\n\n\tmov\tr12,r15\n\tsub\tr13,QWORD PTR[rcx]\n\tsbb\tr14,QWORD PTR[8+rcx]\n\tsbb\tr15,QWORD PTR[16+rcx]\n\tmov\trbp,r9\n\tsbb\tr9,QWORD PTR[24+rcx]\n\tsbb\tr10,0\n\n\tcmovc\tr13,rax\n\tcmovc\tr14,rbx\n\tcmovc\tr15,r12\n\tmov\tQWORD PTR[rsi],r13\n\tcmovc\tr9,rbp\n\tmov\tQWORD PTR[8+rsi],r14\n\tmov\tQWORD PTR[16+rsi],r15\n\tmov\tQWORD PTR[24+rsi],r9\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n__mulq_mont_sparse_256\tENDP\nPUBLIC\tfrom_mont_256\n\n\nALIGN\t32\nfrom_mont_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_from_mont_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tfrom_mont_256$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_from_mont_256::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulq_by_1_mont_256\n\n\n\n\n\n\tmov\tr10,r14\n\tmov\tr11,r15\n\tmov\tr12,r9\n\n\tsub\tr13,QWORD PTR[rbx]\n\tsbb\tr14,QWORD PTR[8+rbx]\n\tsbb\tr15,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\n\tcmovnc\trax,r13\n\tcmovnc\tr10,r14\n\tcmovnc\tr11,r15\n\tmov\tQWORD PTR[rdi],rax\n\tcmovnc\tr12,r9\n\tmov\tQWORD PTR[8+rdi],r10\n\tmov\tQWORD PTR[16+rdi],r11\n\tmov\tQWORD PTR[24+rdi],r12\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_from_mont_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_from_mont_256::\nfrom_mont_256\tENDP\n\nPUBLIC\tredc_mont_256\n\n\nALIGN\t32\nredc_mont_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_redc_mont_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tredc_mont_256$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_redc_mont_256::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulq_by_1_mont_256\n\n\tadd\tr13,QWORD PTR[32+rsi]\n\tadc\tr14,QWORD PTR[40+rsi]\n\tmov\trax,r13\n\tadc\tr15,QWORD PTR[48+rsi]\n\tmov\tr10,r14\n\tadc\tr9,QWORD PTR[56+rsi]\n\tsbb\trsi,rsi\n\n\n\n\n\tmov\tr11,r15\n\tsub\tr13,QWORD PTR[rbx]\n\tsbb\tr14,QWORD PTR[8+rbx]\n\tsbb\tr15,QWORD PTR[16+rbx]\n\tmov\tr12,r9\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\trsi,0\n\n\tcmovnc\trax,r13\n\tcmovnc\tr10,r14\n\tcmovnc\tr11,r15\n\tmov\tQWORD PTR[rdi],rax\n\tcmovnc\tr12,r9\n\tmov\tQWORD PTR[8+rdi],r10\n\tmov\tQWORD PTR[16+rdi],r11\n\tmov\tQWORD PTR[24+rdi],r12\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_redc_mont_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_redc_mont_256::\nredc_mont_256\tENDP\n\nALIGN\t32\n__mulq_by_1_mont_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trax,QWORD PTR[rsi]\n\tmov\tr10,QWORD PTR[8+rsi]\n\tmov\tr11,QWORD PTR[16+rsi]\n\tmov\tr12,QWORD PTR[24+rsi]\n\n\tmov\tr13,rax\n\timul\trax,rcx\n\tmov\tr9,rax\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\tr13,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr10,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr10,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tmov\tr14,r10\n\timul\tr10,rcx\n\tadd\tr11,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr11,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,r13\n\tadc\trdx,0\n\tmov\tr13,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\tr14,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr11,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr11,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tmov\tr15,r11\n\timul\tr11,rcx\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\tr15,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr12,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tmov\tr9,r12\n\timul\tr12,rcx\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr9,rax\n\tmov\trax,r12\n\tadc\tr9,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr13,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr13,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr15,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulq_by_1_mont_256\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_mul_mont_sparse_256\n\tDD\timagerel $L$SEH_body_mul_mont_sparse_256\n\tDD\timagerel $L$SEH_info_mul_mont_sparse_256_prologue\n\n\tDD\timagerel $L$SEH_body_mul_mont_sparse_256\n\tDD\timagerel $L$SEH_epilogue_mul_mont_sparse_256\n\tDD\timagerel $L$SEH_info_mul_mont_sparse_256_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_mont_sparse_256\n\tDD\timagerel $L$SEH_end_mul_mont_sparse_256\n\tDD\timagerel $L$SEH_info_mul_mont_sparse_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_mont_sparse_256\n\tDD\timagerel $L$SEH_body_sqr_mont_sparse_256\n\tDD\timagerel $L$SEH_info_sqr_mont_sparse_256_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_mont_sparse_256\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_sparse_256\n\tDD\timagerel $L$SEH_info_sqr_mont_sparse_256_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_sparse_256\n\tDD\timagerel $L$SEH_end_sqr_mont_sparse_256\n\tDD\timagerel $L$SEH_info_sqr_mont_sparse_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_from_mont_256\n\tDD\timagerel $L$SEH_body_from_mont_256\n\tDD\timagerel $L$SEH_info_from_mont_256_prologue\n\n\tDD\timagerel $L$SEH_body_from_mont_256\n\tDD\timagerel $L$SEH_epilogue_from_mont_256\n\tDD\timagerel $L$SEH_info_from_mont_256_body\n\n\tDD\timagerel $L$SEH_epilogue_from_mont_256\n\tDD\timagerel $L$SEH_end_from_mont_256\n\tDD\timagerel $L$SEH_info_from_mont_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_redc_mont_256\n\tDD\timagerel $L$SEH_body_redc_mont_256\n\tDD\timagerel $L$SEH_info_redc_mont_256_prologue\n\n\tDD\timagerel $L$SEH_body_redc_mont_256\n\tDD\timagerel $L$SEH_epilogue_redc_mont_256\n\tDD\timagerel $L$SEH_info_redc_mont_256_body\n\n\tDD\timagerel $L$SEH_epilogue_redc_mont_256\n\tDD\timagerel $L$SEH_end_redc_mont_256\n\tDD\timagerel $L$SEH_info_redc_mont_256_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_mul_mont_sparse_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_mont_sparse_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_mont_sparse_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_mont_sparse_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_mont_sparse_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_mont_sparse_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_from_mont_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_from_mont_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_from_mont_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_redc_mont_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_redc_mont_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_redc_mont_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/mulq_mont_384-x86_64.asm",
    "content": "OPTION\tDOTNAME\nEXTERN\tmul_mont_384x$1:NEAR\nEXTERN\tsqr_mont_384x$1:NEAR\nEXTERN\tmul_382x$1:NEAR\nEXTERN\tsqr_382x$1:NEAR\nEXTERN\tmul_384$1:NEAR\nEXTERN\tsqr_384$1:NEAR\nEXTERN\tredc_mont_384$1:NEAR\nEXTERN\tfrom_mont_384$1:NEAR\nEXTERN\tsgn0_pty_mont_384$1:NEAR\nEXTERN\tsgn0_pty_mont_384x$1:NEAR\nEXTERN\tmul_mont_384$1:NEAR\nEXTERN\tsqr_mont_384$1:NEAR\nEXTERN\tsqr_n_mul_mont_384$1:NEAR\nEXTERN\tsqr_n_mul_mont_383$1:NEAR\nEXTERN\tsqr_mont_382x$1:NEAR\n_DATA\tSEGMENT\nCOMM\t__blst_platform_cap:DWORD:1\n_DATA\tENDS\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\n\n\n\n\n\n\n\nALIGN\t32\n__subq_mod_384x384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\tr15,QWORD PTR[56+rsi]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\trax,QWORD PTR[64+rsi]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trbx,QWORD PTR[72+rsi]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbp,QWORD PTR[80+rsi]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trsi,QWORD PTR[88+rsi]\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tmov\tQWORD PTR[rdi],r8\n\tsbb\tr14,QWORD PTR[48+rdx]\n\tmov\tr8,QWORD PTR[rcx]\n\tmov\tQWORD PTR[8+rdi],r9\n\tsbb\tr15,QWORD PTR[56+rdx]\n\tmov\tr9,QWORD PTR[8+rcx]\n\tmov\tQWORD PTR[16+rdi],r10\n\tsbb\trax,QWORD PTR[64+rdx]\n\tmov\tr10,QWORD PTR[16+rcx]\n\tmov\tQWORD PTR[24+rdi],r11\n\tsbb\trbx,QWORD PTR[72+rdx]\n\tmov\tr11,QWORD PTR[24+rcx]\n\tmov\tQWORD PTR[32+rdi],r12\n\tsbb\trbp,QWORD PTR[80+rdx]\n\tmov\tr12,QWORD PTR[32+rcx]\n\tmov\tQWORD PTR[40+rdi],r13\n\tsbb\trsi,QWORD PTR[88+rdx]\n\tmov\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,rdx\n\n\tand\tr8,rdx\n\tand\tr9,rdx\n\tand\tr10,rdx\n\tand\tr11,rdx\n\tand\tr12,rdx\n\tand\tr13,rdx\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tmov\tQWORD PTR[48+rdi],r14\n\tadc\trax,r10\n\tmov\tQWORD PTR[56+rdi],r15\n\tadc\trbx,r11\n\tmov\tQWORD PTR[64+rdi],rax\n\tadc\trbp,r12\n\tmov\tQWORD PTR[72+rdi],rbx\n\tadc\trsi,r13\n\tmov\tQWORD PTR[80+rdi],rbp\n\tmov\tQWORD PTR[88+rdi],rsi\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__subq_mod_384x384\tENDP\n\n\nALIGN\t32\n__addq_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tadd\tr8,QWORD PTR[rdx]\n\tadc\tr9,QWORD PTR[8+rdx]\n\tadc\tr10,QWORD PTR[16+rdx]\n\tmov\tr14,r8\n\tadc\tr11,QWORD PTR[24+rdx]\n\tmov\tr15,r9\n\tadc\tr12,QWORD PTR[32+rdx]\n\tmov\trax,r10\n\tadc\tr13,QWORD PTR[40+rdx]\n\tmov\trbx,r11\n\tsbb\trdx,rdx\n\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tmov\trbp,r12\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rcx]\n\tmov\trsi,r13\n\tsbb\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr8,r14\n\tcmovc\tr9,r15\n\tcmovc\tr10,rax\n\tmov\tQWORD PTR[rdi],r8\n\tcmovc\tr11,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tcmovc\tr12,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tcmovc\tr13,rsi\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__addq_mod_384\tENDP\n\n\nALIGN\t32\n__subq_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n__subq_mod_384_a_is_loaded::\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\tr14,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\tr15,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trax,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbx,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trbp,QWORD PTR[32+rcx]\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tmov\trsi,QWORD PTR[40+rcx]\n\tsbb\trdx,rdx\n\n\tand\tr14,rdx\n\tand\tr15,rdx\n\tand\trax,rdx\n\tand\trbx,rdx\n\tand\trbp,rdx\n\tand\trsi,rdx\n\n\tadd\tr8,r14\n\tadc\tr9,r15\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,rax\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,rbx\n\tmov\tQWORD PTR[16+rdi],r10\n\tadc\tr12,rbp\n\tmov\tQWORD PTR[24+rdi],r11\n\tadc\tr13,rsi\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__subq_mod_384\tENDP\nPUBLIC\tmul_mont_384x\n\n\nALIGN\t32\nmul_mont_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_mont_384x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tmul_mont_384x$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,328\n\n$L$SEH_body_mul_mont_384x::\n\n\n\tmov\trbx,rdx\n\tmov\tQWORD PTR[32+rsp],rdi\n\tmov\tQWORD PTR[24+rsp],rsi\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\tmov\tQWORD PTR[rsp],r8\n\n\n\n\n\tlea\trdi,QWORD PTR[40+rsp]\n\tcall\t__mulq_384\n\n\n\tlea\trbx,QWORD PTR[48+rbx]\n\tlea\trsi,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[((40+96))+rsp]\n\tcall\t__mulq_384\n\n\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trdx,QWORD PTR[((-48))+rsi]\n\tlea\trdi,QWORD PTR[((40+192+48))+rsp]\n\tcall\t__addq_mod_384\n\n\tmov\trsi,QWORD PTR[16+rsp]\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\n\tcall\t__addq_mod_384\n\n\tlea\trbx,QWORD PTR[rdi]\n\tlea\trsi,QWORD PTR[48+rdi]\n\tcall\t__mulq_384\n\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trdx,QWORD PTR[40+rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\n\tcall\t__subq_mod_384x384\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trdx,QWORD PTR[((-96))+rdi]\n\tcall\t__subq_mod_384x384\n\n\n\tlea\trsi,QWORD PTR[40+rsp]\n\tlea\trdx,QWORD PTR[((40+96))+rsp]\n\tlea\trdi,QWORD PTR[40+rsp]\n\tcall\t__subq_mod_384x384\n\n\tmov\trbx,rcx\n\n\n\tlea\trsi,QWORD PTR[40+rsp]\n\tmov\trcx,QWORD PTR[rsp]\n\tmov\trdi,QWORD PTR[32+rsp]\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\n\tlea\trsi,QWORD PTR[((40+192))+rsp]\n\tmov\trcx,QWORD PTR[rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tlea\tr8,QWORD PTR[328+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_mul_mont_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_mont_384x::\nmul_mont_384x\tENDP\nPUBLIC\tsqr_mont_384x\n\n\nALIGN\t32\nsqr_mont_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_mont_384x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_mont_384x$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,136\n\n$L$SEH_body_sqr_mont_384x::\n\n\n\tmov\tQWORD PTR[rsp],rcx\n\tmov\trcx,rdx\n\tmov\tQWORD PTR[8+rsp],rdi\n\tmov\tQWORD PTR[16+rsp],rsi\n\n\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[32+rsp]\n\tcall\t__addq_mod_384\n\n\n\tmov\trsi,QWORD PTR[16+rsp]\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[((32+48))+rsp]\n\tcall\t__subq_mod_384\n\n\n\tmov\trsi,QWORD PTR[16+rsp]\n\tlea\trbx,QWORD PTR[48+rsi]\n\n\tmov\trax,QWORD PTR[48+rsi]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\tr12,QWORD PTR[16+rsi]\n\tmov\tr13,QWORD PTR[24+rsi]\n\n\tcall\t__mulq_mont_384\n\tadd\tr14,r14\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tmov\tr12,r14\n\tadc\tr9,r9\n\tmov\tr13,r15\n\tadc\tr10,r10\n\tmov\trax,r8\n\tadc\tr11,r11\n\tmov\trbx,r9\n\tsbb\trdx,rdx\n\n\tsub\tr14,QWORD PTR[rcx]\n\tsbb\tr15,QWORD PTR[8+rcx]\n\tmov\trbp,r10\n\tsbb\tr8,QWORD PTR[16+rcx]\n\tsbb\tr9,QWORD PTR[24+rcx]\n\tsbb\tr10,QWORD PTR[32+rcx]\n\tmov\trsi,r11\n\tsbb\tr11,QWORD PTR[40+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr14,r12\n\tcmovc\tr15,r13\n\tcmovc\tr8,rax\n\tmov\tQWORD PTR[48+rdi],r14\n\tcmovc\tr9,rbx\n\tmov\tQWORD PTR[56+rdi],r15\n\tcmovc\tr10,rbp\n\tmov\tQWORD PTR[64+rdi],r8\n\tcmovc\tr11,rsi\n\tmov\tQWORD PTR[72+rdi],r9\n\tmov\tQWORD PTR[80+rdi],r10\n\tmov\tQWORD PTR[88+rdi],r11\n\n\tlea\trsi,QWORD PTR[32+rsp]\n\tlea\trbx,QWORD PTR[((32+48))+rsp]\n\n\tmov\trax,QWORD PTR[((32+48))+rsp]\n\tmov\tr14,QWORD PTR[((32+0))+rsp]\n\tmov\tr15,QWORD PTR[((32+8))+rsp]\n\tmov\tr12,QWORD PTR[((32+16))+rsp]\n\tmov\tr13,QWORD PTR[((32+24))+rsp]\n\n\tcall\t__mulq_mont_384\n\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqr_mont_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_mont_384x::\nsqr_mont_384x\tENDP\n\nPUBLIC\tmul_382x\n\n\nALIGN\t32\nmul_382x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_382x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tmul_382x$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,136\n\n$L$SEH_body_mul_382x::\n\n\n\tlea\trdi,QWORD PTR[96+rdi]\n\tmov\tQWORD PTR[rsp],rsi\n\tmov\tQWORD PTR[8+rsp],rdx\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tadd\tr8,QWORD PTR[48+rsi]\n\tadc\tr9,QWORD PTR[56+rsi]\n\tadc\tr10,QWORD PTR[64+rsi]\n\tadc\tr11,QWORD PTR[72+rsi]\n\tadc\tr12,QWORD PTR[80+rsi]\n\tadc\tr13,QWORD PTR[88+rsi]\n\n\tmov\tQWORD PTR[((32+0))+rsp],r8\n\tmov\tQWORD PTR[((32+8))+rsp],r9\n\tmov\tQWORD PTR[((32+16))+rsp],r10\n\tmov\tQWORD PTR[((32+24))+rsp],r11\n\tmov\tQWORD PTR[((32+32))+rsp],r12\n\tmov\tQWORD PTR[((32+40))+rsp],r13\n\n\n\tmov\tr8,QWORD PTR[rdx]\n\tmov\tr9,QWORD PTR[8+rdx]\n\tmov\tr10,QWORD PTR[16+rdx]\n\tmov\tr11,QWORD PTR[24+rdx]\n\tmov\tr12,QWORD PTR[32+rdx]\n\tmov\tr13,QWORD PTR[40+rdx]\n\n\tadd\tr8,QWORD PTR[48+rdx]\n\tadc\tr9,QWORD PTR[56+rdx]\n\tadc\tr10,QWORD PTR[64+rdx]\n\tadc\tr11,QWORD PTR[72+rdx]\n\tadc\tr12,QWORD PTR[80+rdx]\n\tadc\tr13,QWORD PTR[88+rdx]\n\n\tmov\tQWORD PTR[((32+48))+rsp],r8\n\tmov\tQWORD PTR[((32+56))+rsp],r9\n\tmov\tQWORD PTR[((32+64))+rsp],r10\n\tmov\tQWORD PTR[((32+72))+rsp],r11\n\tmov\tQWORD PTR[((32+80))+rsp],r12\n\tmov\tQWORD PTR[((32+88))+rsp],r13\n\n\n\tlea\trsi,QWORD PTR[((32+0))+rsp]\n\tlea\trbx,QWORD PTR[((32+48))+rsp]\n\tcall\t__mulq_384\n\n\n\tmov\trsi,QWORD PTR[rsp]\n\tmov\trbx,QWORD PTR[8+rsp]\n\tlea\trdi,QWORD PTR[((-96))+rdi]\n\tcall\t__mulq_384\n\n\n\tlea\trsi,QWORD PTR[48+rsi]\n\tlea\trbx,QWORD PTR[48+rbx]\n\tlea\trdi,QWORD PTR[32+rsp]\n\tcall\t__mulq_384\n\n\n\tmov\trsi,QWORD PTR[16+rsp]\n\tlea\trdx,QWORD PTR[32+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tmov\trdi,rsi\n\tcall\t__subq_mod_384x384\n\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trdx,QWORD PTR[((-96))+rdi]\n\tcall\t__subq_mod_384x384\n\n\n\tlea\trsi,QWORD PTR[((-96))+rdi]\n\tlea\trdx,QWORD PTR[32+rsp]\n\tlea\trdi,QWORD PTR[((-96))+rdi]\n\tcall\t__subq_mod_384x384\n\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_mul_382x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_382x::\nmul_382x\tENDP\nPUBLIC\tsqr_382x\n\n\nALIGN\t32\nsqr_382x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_382x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_382x$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trsi\n\n$L$SEH_body_sqr_382x::\n\n\n\tmov\trcx,rdx\n\n\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\trbx,QWORD PTR[24+rsi]\n\tmov\trbp,QWORD PTR[32+rsi]\n\tmov\trdx,QWORD PTR[40+rsi]\n\n\tmov\tr8,r14\n\tadd\tr14,QWORD PTR[48+rsi]\n\tmov\tr9,r15\n\tadc\tr15,QWORD PTR[56+rsi]\n\tmov\tr10,rax\n\tadc\trax,QWORD PTR[64+rsi]\n\tmov\tr11,rbx\n\tadc\trbx,QWORD PTR[72+rsi]\n\tmov\tr12,rbp\n\tadc\trbp,QWORD PTR[80+rsi]\n\tmov\tr13,rdx\n\tadc\trdx,QWORD PTR[88+rsi]\n\n\tmov\tQWORD PTR[rdi],r14\n\tmov\tQWORD PTR[8+rdi],r15\n\tmov\tQWORD PTR[16+rdi],rax\n\tmov\tQWORD PTR[24+rdi],rbx\n\tmov\tQWORD PTR[32+rdi],rbp\n\tmov\tQWORD PTR[40+rdi],rdx\n\n\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__subq_mod_384_a_is_loaded\n\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trbx,QWORD PTR[((-48))+rdi]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\n\tcall\t__mulq_384\n\n\n\tmov\trsi,QWORD PTR[rsp]\n\tlea\trbx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[96+rdi]\n\tcall\t__mulq_384\n\n\tmov\tr8,QWORD PTR[rdi]\n\tmov\tr9,QWORD PTR[8+rdi]\n\tmov\tr10,QWORD PTR[16+rdi]\n\tmov\tr11,QWORD PTR[24+rdi]\n\tmov\tr12,QWORD PTR[32+rdi]\n\tmov\tr13,QWORD PTR[40+rdi]\n\tmov\tr14,QWORD PTR[48+rdi]\n\tmov\tr15,QWORD PTR[56+rdi]\n\tmov\trax,QWORD PTR[64+rdi]\n\tmov\trbx,QWORD PTR[72+rdi]\n\tmov\trbp,QWORD PTR[80+rdi]\n\tadd\tr8,r8\n\tmov\trdx,QWORD PTR[88+rdi]\n\tadc\tr9,r9\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,r10\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,r11\n\tmov\tQWORD PTR[16+rdi],r10\n\tadc\tr12,r12\n\tmov\tQWORD PTR[24+rdi],r11\n\tadc\tr13,r13\n\tmov\tQWORD PTR[32+rdi],r12\n\tadc\tr14,r14\n\tmov\tQWORD PTR[40+rdi],r13\n\tadc\tr15,r15\n\tmov\tQWORD PTR[48+rdi],r14\n\tadc\trax,rax\n\tmov\tQWORD PTR[56+rdi],r15\n\tadc\trbx,rbx\n\tmov\tQWORD PTR[64+rdi],rax\n\tadc\trbp,rbp\n\tmov\tQWORD PTR[72+rdi],rbx\n\tadc\trdx,rdx\n\tmov\tQWORD PTR[80+rdi],rbp\n\tmov\tQWORD PTR[88+rdi],rdx\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sqr_382x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_382x::\nsqr_382x\tENDP\nPUBLIC\tmul_384\n\n\nALIGN\t32\nmul_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tmul_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n$L$SEH_body_mul_384::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulq_384\n\n\tmov\tr12,QWORD PTR[rsp]\n\n\tmov\trbx,QWORD PTR[8+rsp]\n\n\tmov\trbp,QWORD PTR[16+rsp]\n\n\tlea\trsp,QWORD PTR[24+rsp]\n\n$L$SEH_epilogue_mul_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_384::\nmul_384\tENDP\n\n\nALIGN\t32\n__mulq_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trax,QWORD PTR[rbx]\n\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tmov\tQWORD PTR[rdi],rax\n\tmov\trax,rbp\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\trcx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr11,rax\n\tmov\trax,QWORD PTR[8+rbx]\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\trcx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tQWORD PTR[8+rdi],rcx\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\trcx,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr9,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr12,rax\n\tmov\trax,QWORD PTR[16+rbx]\n\tadc\trdx,0\n\tadd\tr11,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\trcx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tQWORD PTR[16+rdi],rcx\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\trcx,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr9,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr12,rax\n\tmov\trax,QWORD PTR[24+rbx]\n\tadc\trdx,0\n\tadd\tr11,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\trcx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tQWORD PTR[24+rdi],rcx\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\trcx,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr9,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr12,rax\n\tmov\trax,QWORD PTR[32+rbx]\n\tadc\trdx,0\n\tadd\tr11,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\trcx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tQWORD PTR[32+rdi],rcx\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\trcx,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr9,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr12,rax\n\tmov\trax,QWORD PTR[40+rbx]\n\tadc\trdx,0\n\tadd\tr11,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\trcx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tQWORD PTR[40+rdi],rcx\n\tmov\trcx,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\trcx,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr9,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr12,rax\n\tmov\trax,rax\n\tadc\trdx,0\n\tadd\tr11,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\tmov\tQWORD PTR[48+rdi],rcx\n\tmov\tQWORD PTR[56+rdi],r8\n\tmov\tQWORD PTR[64+rdi],r9\n\tmov\tQWORD PTR[72+rdi],r10\n\tmov\tQWORD PTR[80+rdi],r11\n\tmov\tQWORD PTR[88+rdi],r12\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulq_384\tENDP\nPUBLIC\tsqr_384\n\n\nALIGN\t32\nsqr_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sqr_384::\n\n\n\tcall\t__sqrq_384\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sqr_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_384::\nsqr_384\tENDP\n\n\nALIGN\t32\n__sqrq_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trax,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trcx,QWORD PTR[16+rsi]\n\tmov\trbx,QWORD PTR[24+rsi]\n\n\n\tmov\tr14,rax\n\tmul\tr15\n\tmov\tr9,rax\n\tmov\trax,r14\n\tmov\trbp,QWORD PTR[32+rsi]\n\tmov\tr10,rdx\n\n\tmul\trcx\n\tadd\tr10,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\trsi,QWORD PTR[40+rsi]\n\tmov\tr11,rdx\n\n\tmul\trbx\n\tadd\tr11,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\trbp\n\tadd\tr12,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr13,rdx\n\n\tmul\trsi\n\tadd\tr13,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\trax\n\txor\tr8,r8\n\tmov\tQWORD PTR[rdi],rax\n\tmov\trax,r15\n\tadd\tr9,r9\n\tadc\tr8,0\n\tadd\tr9,rdx\n\tadc\tr8,0\n\tmov\tQWORD PTR[8+rdi],r9\n\n\tmul\trcx\n\tadd\tr11,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\trbx\n\tadd\tr12,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tadd\tr12,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\trbp\n\tadd\tr13,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tadd\tr13,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\trsi\n\tadd\tr14,rax\n\tmov\trax,r15\n\tadc\trdx,0\n\tadd\tr14,r9\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\trax\n\txor\tr9,r9\n\tadd\tr8,rax\n\tmov\trax,rcx\n\tadd\tr10,r10\n\tadc\tr11,r11\n\tadc\tr9,0\n\tadd\tr10,r8\n\tadc\tr11,rdx\n\tadc\tr9,0\n\tmov\tQWORD PTR[16+rdi],r10\n\n\tmul\trbx\n\tadd\tr13,rax\n\tmov\trax,rcx\n\tadc\trdx,0\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tr8,rdx\n\n\tmul\trbp\n\tadd\tr14,rax\n\tmov\trax,rcx\n\tadc\trdx,0\n\tadd\tr14,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\trsi\n\tadd\tr15,rax\n\tmov\trax,rcx\n\tadc\trdx,0\n\tadd\tr15,r8\n\tadc\trdx,0\n\tmov\trcx,rdx\n\n\tmul\trax\n\txor\tr11,r11\n\tadd\tr9,rax\n\tmov\trax,rbx\n\tadd\tr12,r12\n\tadc\tr13,r13\n\tadc\tr11,0\n\tadd\tr12,r9\n\tadc\tr13,rdx\n\tadc\tr11,0\n\tmov\tQWORD PTR[32+rdi],r12\n\n\n\tmul\trbp\n\tadd\tr15,rax\n\tmov\trax,rbx\n\tadc\trdx,0\n\tmov\tQWORD PTR[40+rdi],r13\n\tmov\tr8,rdx\n\n\tmul\trsi\n\tadd\trcx,rax\n\tmov\trax,rbx\n\tadc\trdx,0\n\tadd\trcx,r8\n\tadc\trdx,0\n\tmov\trbx,rdx\n\n\tmul\trax\n\txor\tr12,r12\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadd\tr14,r14\n\tadc\tr15,r15\n\tadc\tr12,0\n\tadd\tr14,r11\n\tadc\tr15,rdx\n\tmov\tQWORD PTR[48+rdi],r14\n\tadc\tr12,0\n\tmov\tQWORD PTR[56+rdi],r15\n\n\n\tmul\trsi\n\tadd\trbx,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\trax\n\txor\tr13,r13\n\tadd\tr12,rax\n\tmov\trax,rsi\n\tadd\trcx,rcx\n\tadc\trbx,rbx\n\tadc\tr13,0\n\tadd\trcx,r12\n\tadc\trbx,rdx\n\tmov\tQWORD PTR[64+rdi],rcx\n\tadc\tr13,0\n\tmov\tQWORD PTR[72+rdi],rbx\n\n\n\tmul\trax\n\tadd\trax,r13\n\tadd\trbp,rbp\n\tadc\trdx,0\n\tadd\trax,rbp\n\tadc\trdx,0\n\tmov\tQWORD PTR[80+rdi],rax\n\tmov\tQWORD PTR[88+rdi],rdx\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__sqrq_384\tENDP\n\nPUBLIC\tsqr_mont_384\n\n\nALIGN\t32\nsqr_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_mont_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8*15\n\n$L$SEH_body_sqr_mont_384::\n\n\n\tmov\tQWORD PTR[96+rsp],rcx\n\tmov\tQWORD PTR[104+rsp],rdx\n\tmov\tQWORD PTR[112+rsp],rdi\n\n\tmov\trdi,rsp\n\tcall\t__sqrq_384\n\n\tlea\trsi,QWORD PTR[rsp]\n\tmov\trcx,QWORD PTR[96+rsp]\n\tmov\trbx,QWORD PTR[104+rsp]\n\tmov\trdi,QWORD PTR[112+rsp]\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tlea\tr8,QWORD PTR[120+rsp]\n\tmov\tr15,QWORD PTR[120+rsp]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqr_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_mont_384::\nsqr_mont_384\tENDP\n\n\n\nPUBLIC\tredc_mont_384\n\n\nALIGN\t32\nredc_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_redc_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tredc_mont_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_redc_mont_384::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_redc_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_redc_mont_384::\nredc_mont_384\tENDP\n\n\n\n\nPUBLIC\tfrom_mont_384\n\n\nALIGN\t32\nfrom_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_from_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tfrom_mont_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_from_mont_384::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulq_by_1_mont_384\n\n\n\n\n\n\tmov\trcx,r15\n\tmov\trdx,r8\n\tmov\trbp,r9\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tmov\tr13,r10\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tmov\trsi,r11\n\tsbb\tr11,QWORD PTR[40+rbx]\n\n\tcmovc\tr14,rax\n\tcmovc\tr15,rcx\n\tcmovc\tr8,rdx\n\tmov\tQWORD PTR[rdi],r14\n\tcmovc\tr9,rbp\n\tmov\tQWORD PTR[8+rdi],r15\n\tcmovc\tr10,r13\n\tmov\tQWORD PTR[16+rdi],r8\n\tcmovc\tr11,rsi\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_from_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_from_mont_384::\nfrom_mont_384\tENDP\n\nALIGN\t32\n__mulq_by_1_mont_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trax,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\tr14,rax\n\timul\trax,rcx\n\tmov\tr8,rax\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr14,rax\n\tmov\trax,r8\n\tadc\tr14,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr9,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr9,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr10,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr10,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr11,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tmov\tr15,r9\n\timul\tr9,rcx\n\tadd\tr11,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[32+rbx]\n\tadd\tr12,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr12,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[40+rbx]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr13,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr15,rax\n\tmov\trax,r9\n\tadc\tr15,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr10,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr10,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr11,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr11,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr12,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tmov\tr8,r10\n\timul\tr10,rcx\n\tadd\tr12,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[32+rbx]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr13,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[40+rbx]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr14,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr8,rax\n\tmov\trax,r10\n\tadc\tr8,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr11,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr11,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr13,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr9,r11\n\timul\tr11,rcx\n\tadd\tr13,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[32+rbx]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr14,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[40+rbx]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr15,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr9,rax\n\tmov\trax,r11\n\tadc\tr9,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr12,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr14,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr10,r12\n\timul\tr12,rcx\n\tadd\tr14,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[32+rbx]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr15,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[40+rbx]\n\tadd\tr8,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr8,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr10,rax\n\tmov\trax,r12\n\tadc\tr10,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr13,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr13,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr15,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr11,r13\n\timul\tr13,rcx\n\tadd\tr15,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rbx]\n\tadd\tr8,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr8,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[40+rbx]\n\tadd\tr9,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr9,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr11,rax\n\tmov\trax,r13\n\tadc\tr11,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr14,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr14,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr15,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr8,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr8,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[32+rbx]\n\tadd\tr9,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr9,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rbx]\n\tadd\tr10,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulq_by_1_mont_384\tENDP\n\n\nALIGN\t32\n__redq_tail_mont_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tadd\tr14,QWORD PTR[48+rsi]\n\tmov\trax,r14\n\tadc\tr15,QWORD PTR[56+rsi]\n\tadc\tr8,QWORD PTR[64+rsi]\n\tadc\tr9,QWORD PTR[72+rsi]\n\tmov\trcx,r15\n\tadc\tr10,QWORD PTR[80+rsi]\n\tadc\tr11,QWORD PTR[88+rsi]\n\tsbb\tr12,r12\n\n\n\n\n\tmov\trdx,r8\n\tmov\trbp,r9\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tmov\tr13,r10\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tmov\trsi,r11\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\tr12,0\n\n\tcmovc\tr14,rax\n\tcmovc\tr15,rcx\n\tcmovc\tr8,rdx\n\tmov\tQWORD PTR[rdi],r14\n\tcmovc\tr9,rbp\n\tmov\tQWORD PTR[8+rdi],r15\n\tcmovc\tr10,r13\n\tmov\tQWORD PTR[16+rdi],r8\n\tcmovc\tr11,rsi\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__redq_tail_mont_384\tENDP\n\nPUBLIC\tsgn0_pty_mont_384\n\n\nALIGN\t32\nsgn0_pty_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sgn0_pty_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsgn0_pty_mont_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sgn0_pty_mont_384::\n\n\n\tmov\trbx,rsi\n\tlea\trsi,QWORD PTR[rdi]\n\tmov\trcx,rdx\n\tcall\t__mulq_by_1_mont_384\n\n\txor\trax,rax\n\tmov\tr13,r14\n\tadd\tr14,r14\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trax,0\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\trax,0\n\n\tnot\trax\n\tand\tr13,1\n\tand\trax,2\n\tor\trax,r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sgn0_pty_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sgn0_pty_mont_384::\nsgn0_pty_mont_384\tENDP\n\nPUBLIC\tsgn0_pty_mont_384x\n\n\nALIGN\t32\nsgn0_pty_mont_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sgn0_pty_mont_384x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsgn0_pty_mont_384x$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sgn0_pty_mont_384x::\n\n\n\tmov\trbx,rsi\n\tlea\trsi,QWORD PTR[48+rdi]\n\tmov\trcx,rdx\n\tcall\t__mulq_by_1_mont_384\n\n\tmov\tr12,r14\n\tor\tr14,r15\n\tor\tr14,r8\n\tor\tr14,r9\n\tor\tr14,r10\n\tor\tr14,r11\n\n\tlea\trsi,QWORD PTR[rdi]\n\txor\trdi,rdi\n\tmov\tr13,r12\n\tadd\tr12,r12\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trdi,0\n\n\tsub\tr12,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\trdi,0\n\n\tmov\tQWORD PTR[rsp],r14\n\tnot\trdi\n\tand\tr13,1\n\tand\trdi,2\n\tor\trdi,r13\n\n\tcall\t__mulq_by_1_mont_384\n\n\tmov\tr12,r14\n\tor\tr14,r15\n\tor\tr14,r8\n\tor\tr14,r9\n\tor\tr14,r10\n\tor\tr14,r11\n\n\txor\trax,rax\n\tmov\tr13,r12\n\tadd\tr12,r12\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trax,0\n\n\tsub\tr12,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\trax,0\n\n\tmov\tr12,QWORD PTR[rsp]\n\n\tnot\trax\n\n\ttest\tr14,r14\n\tcmovz\tr13,rdi\n\n\ttest\tr12,r12\n\tcmovnz\trax,rdi\n\n\tand\tr13,1\n\tand\trax,2\n\tor\trax,r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sgn0_pty_mont_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sgn0_pty_mont_384x::\nsgn0_pty_mont_384x\tENDP\nPUBLIC\tmul_mont_384\n\n\nALIGN\t32\nmul_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mul_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tmul_mont_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8*3\n\n$L$SEH_body_mul_mont_384::\n\n\n\tmov\trax,QWORD PTR[rdx]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\tr12,QWORD PTR[16+rsi]\n\tmov\tr13,QWORD PTR[24+rsi]\n\tmov\trbx,rdx\n\tmov\tQWORD PTR[rsp],r8\n\tmov\tQWORD PTR[8+rsp],rdi\n\n\tcall\t__mulq_mont_384\n\n\tmov\tr15,QWORD PTR[24+rsp]\n\n\tmov\tr14,QWORD PTR[32+rsp]\n\n\tmov\tr13,QWORD PTR[40+rsp]\n\n\tmov\tr12,QWORD PTR[48+rsp]\n\n\tmov\trbx,QWORD PTR[56+rsp]\n\n\tmov\trbp,QWORD PTR[64+rsp]\n\n\tlea\trsp,QWORD PTR[72+rsp]\n\n$L$SEH_epilogue_mul_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mul_mont_384::\nmul_mont_384\tENDP\n\nALIGN\t32\n__mulq_mont_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trdi,rax\n\tmul\tr14\n\tmov\tr8,rax\n\tmov\trax,rdi\n\tmov\tr9,rdx\n\n\tmul\tr15\n\tadd\tr9,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tr12\n\tadd\tr10,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmov\trbp,r8\n\timul\tr8,QWORD PTR[8+rsp]\n\n\tmul\tr13\n\tadd\tr11,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr12,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr13,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr13,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\txor\tr15,r15\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trbp,rax\n\tmov\trax,r8\n\tadc\trbp,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr9,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr9,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr10,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr10,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr11,rbp\n\tadc\trdx,0\n\tadd\tr11,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr12,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr13,rax\n\tmov\trax,QWORD PTR[8+rbx]\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\tr14,rdx\n\tadc\tr15,0\n\n\tmov\trdi,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr9,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr10,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr10,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr11,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr11,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmov\trbp,r9\n\timul\tr9,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr12,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr12,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr13,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr13,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr14,r8\n\tadc\trdx,0\n\txor\tr8,r8\n\tadd\tr14,rax\n\tmov\trax,r9\n\tadc\tr15,rdx\n\tadc\tr8,0\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trbp,rax\n\tmov\trax,r9\n\tadc\trbp,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr10,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr10,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr11,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr11,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tadd\tr12,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr14,rax\n\tmov\trax,QWORD PTR[16+rbx]\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\tr15,rdx\n\tadc\tr8,0\n\n\tmov\trdi,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr10,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr11,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr11,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr12,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr12,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmov\trbp,r10\n\timul\tr10,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr13,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr13,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr14,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr14,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr15,r9\n\tadc\trdx,0\n\txor\tr9,r9\n\tadd\tr15,rax\n\tmov\trax,r10\n\tadc\tr8,rdx\n\tadc\tr9,0\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trbp,rax\n\tmov\trax,r10\n\tadc\trbp,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr11,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr11,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tadd\tr13,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr15,rax\n\tmov\trax,QWORD PTR[24+rbx]\n\tadc\trdx,0\n\tadd\tr15,rbp\n\tadc\tr8,rdx\n\tadc\tr9,0\n\n\tmov\trdi,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr11,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr12,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr12,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr13,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr13,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmov\trbp,r11\n\timul\tr11,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr14,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr14,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr15,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr15,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr8,r10\n\tadc\trdx,0\n\txor\tr10,r10\n\tadd\tr8,rax\n\tmov\trax,r11\n\tadc\tr9,rdx\n\tadc\tr10,0\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trbp,rax\n\tmov\trax,r11\n\tadc\trbp,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr12,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tadd\tr14,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr15,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr8,rax\n\tmov\trax,QWORD PTR[32+rbx]\n\tadc\trdx,0\n\tadd\tr8,rbp\n\tadc\tr9,rdx\n\tadc\tr10,0\n\n\tmov\trdi,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr12,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr13,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr13,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr14,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr14,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmov\trbp,r12\n\timul\tr12,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr15,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr15,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr8,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr8,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr9,r11\n\tadc\trdx,0\n\txor\tr11,r11\n\tadd\tr9,rax\n\tmov\trax,r12\n\tadc\tr10,rdx\n\tadc\tr11,0\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trbp,rax\n\tmov\trax,r12\n\tadc\trbp,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr13,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr13,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr15,rbp\n\tadc\trdx,0\n\tadd\tr15,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr8,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr8,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr9,rax\n\tmov\trax,QWORD PTR[40+rbx]\n\tadc\trdx,0\n\tadd\tr9,rbp\n\tadc\tr10,rdx\n\tadc\tr11,0\n\n\tmov\trdi,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr13,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr14,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr14,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr15,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr15,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmov\trbp,r13\n\timul\tr13,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr8,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr8,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr9,rax\n\tmov\trax,rdi\n\tadc\trdx,0\n\tadd\tr9,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr10,r12\n\tadc\trdx,0\n\txor\tr12,r12\n\tadd\tr10,rax\n\tmov\trax,r13\n\tadc\tr11,rdx\n\tadc\tr12,0\n\n\tmul\tQWORD PTR[rcx]\n\tadd\trbp,rax\n\tmov\trax,r13\n\tadc\trbp,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr14,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr14,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr15,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr8,rbp\n\tadc\trdx,0\n\tadd\tr8,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr9,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr9,rbp\n\tadc\trdx,0\n\tmov\trbp,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr10,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tadd\tr10,rbp\n\tadc\tr11,rdx\n\tadc\tr12,0\n\n\n\n\n\tmov\trdi,QWORD PTR[16+rsp]\n\tsub\tr14,QWORD PTR[rcx]\n\tmov\trdx,r15\n\tsbb\tr15,QWORD PTR[8+rcx]\n\tmov\trbx,r8\n\tsbb\tr8,QWORD PTR[16+rcx]\n\tmov\trsi,r9\n\tsbb\tr9,QWORD PTR[24+rcx]\n\tmov\trbp,r10\n\tsbb\tr10,QWORD PTR[32+rcx]\n\tmov\tr13,r11\n\tsbb\tr11,QWORD PTR[40+rcx]\n\tsbb\tr12,0\n\n\tcmovc\tr14,rax\n\tcmovc\tr15,rdx\n\tcmovc\tr8,rbx\n\tmov\tQWORD PTR[rdi],r14\n\tcmovc\tr9,rsi\n\tmov\tQWORD PTR[8+rdi],r15\n\tcmovc\tr10,rbp\n\tmov\tQWORD PTR[16+rdi],r8\n\tcmovc\tr11,r13\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulq_mont_384\tENDP\nPUBLIC\tsqr_n_mul_mont_384\n\n\nALIGN\t32\nsqr_n_mul_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_n_mul_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\n\tmov\tr9,QWORD PTR[48+rsp]\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_n_mul_mont_384$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8*17\n\n$L$SEH_body_sqr_n_mul_mont_384::\n\n\n\tmov\tQWORD PTR[rsp],r8\n\tmov\tQWORD PTR[8+rsp],rdi\n\tmov\tQWORD PTR[16+rsp],rcx\n\tlea\trdi,QWORD PTR[32+rsp]\n\tmov\tQWORD PTR[24+rsp],r9\n\tmovq\txmm2,QWORD PTR[r9]\n\n$L$oop_sqr_384::\n\tmovd\txmm1,edx\n\n\tcall\t__sqrq_384\n\n\tlea\trsi,QWORD PTR[rdi]\n\tmov\trcx,QWORD PTR[rsp]\n\tmov\trbx,QWORD PTR[16+rsp]\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovd\tedx,xmm1\n\tlea\trsi,QWORD PTR[rdi]\n\tdec\tedx\n\tjnz\t$L$oop_sqr_384\n\nDB\t102,72,15,126,208\n\tmov\trcx,rbx\n\tmov\trbx,QWORD PTR[24+rsp]\n\n\n\n\n\n\n\tmov\tr12,r8\n\tmov\tr13,r9\n\n\tcall\t__mulq_mont_384\n\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[136+rsp]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqr_n_mul_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_n_mul_mont_384::\nsqr_n_mul_mont_384\tENDP\n\nPUBLIC\tsqr_n_mul_mont_383\n\n\nALIGN\t32\nsqr_n_mul_mont_383\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_n_mul_mont_383::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\n\tmov\tr9,QWORD PTR[48+rsp]\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_n_mul_mont_383$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8*17\n\n$L$SEH_body_sqr_n_mul_mont_383::\n\n\n\tmov\tQWORD PTR[rsp],r8\n\tmov\tQWORD PTR[8+rsp],rdi\n\tmov\tQWORD PTR[16+rsp],rcx\n\tlea\trdi,QWORD PTR[32+rsp]\n\tmov\tQWORD PTR[24+rsp],r9\n\tmovq\txmm2,QWORD PTR[r9]\n\n$L$oop_sqr_383::\n\tmovd\txmm1,edx\n\n\tcall\t__sqrq_384\n\n\tlea\trsi,QWORD PTR[rdi]\n\tmov\trcx,QWORD PTR[rsp]\n\tmov\trbx,QWORD PTR[16+rsp]\n\tcall\t__mulq_by_1_mont_384\n\n\tmovd\tedx,xmm1\n\tadd\tr14,QWORD PTR[48+rsi]\n\tadc\tr15,QWORD PTR[56+rsi]\n\tadc\tr8,QWORD PTR[64+rsi]\n\tadc\tr9,QWORD PTR[72+rsi]\n\tadc\tr10,QWORD PTR[80+rsi]\n\tadc\tr11,QWORD PTR[88+rsi]\n\tlea\trsi,QWORD PTR[rdi]\n\n\tmov\tQWORD PTR[rdi],r14\n\tmov\tQWORD PTR[8+rdi],r15\n\tmov\tQWORD PTR[16+rdi],r8\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\n\tdec\tedx\n\tjnz\t$L$oop_sqr_383\n\nDB\t102,72,15,126,208\n\tmov\trcx,rbx\n\tmov\trbx,QWORD PTR[24+rsp]\n\n\n\n\n\n\n\tmov\tr12,r8\n\tmov\tr13,r9\n\n\tcall\t__mulq_mont_384\n\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[136+rsp]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqr_n_mul_mont_383::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_n_mul_mont_383::\nsqr_n_mul_mont_383\tENDP\n\nALIGN\t32\n__mulq_mont_383_nonred\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trbp,rax\n\tmul\tr14\n\tmov\tr8,rax\n\tmov\trax,rbp\n\tmov\tr9,rdx\n\n\tmul\tr15\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tr12\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmov\tr15,r8\n\timul\tr8,QWORD PTR[8+rsp]\n\n\tmul\tr13\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr12,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr13,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr13,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\tr15,rax\n\tmov\trax,r8\n\tadc\tr15,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr9,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr9,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr10,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr10,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr11,r15\n\tadc\trdx,0\n\tadd\tr11,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr12,rax\n\tmov\trax,r8\n\tadc\trdx,0\n\tadd\tr12,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr13,rax\n\tmov\trax,QWORD PTR[8+rbx]\n\tadc\trdx,0\n\tadd\tr13,r15\n\tadc\tr14,rdx\n\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr10,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr11,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmov\tr8,r9\n\timul\tr9,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr12,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr12,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr13,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr13,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr14,r15\n\tadc\trdx,0\n\tadd\tr14,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\tr8,rax\n\tmov\trax,r9\n\tadc\tr8,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr10,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr10,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr11,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr11,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr12,r8\n\tadc\trdx,0\n\tadd\tr12,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr13,rax\n\tmov\trax,r9\n\tadc\trdx,0\n\tadd\tr13,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr14,rax\n\tmov\trax,QWORD PTR[16+rbx]\n\tadc\trdx,0\n\tadd\tr14,r8\n\tadc\tr15,rdx\n\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr10,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr11,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr12,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr12,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmov\tr9,r10\n\timul\tr10,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr13,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr13,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr14,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr14,r8\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr15,r8\n\tadc\trdx,0\n\tadd\tr15,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr8,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\tr9,rax\n\tmov\trax,r10\n\tadc\tr9,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr11,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr11,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr13,r9\n\tadc\trdx,0\n\tadd\tr13,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr14,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr15,rax\n\tmov\trax,QWORD PTR[24+rbx]\n\tadc\trdx,0\n\tadd\tr15,r9\n\tadc\tr8,rdx\n\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr11,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr12,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr12,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr13,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr13,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmov\tr10,r11\n\timul\tr11,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr14,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr14,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr15,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr15,r9\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr8,r9\n\tadc\trdx,0\n\tadd\tr8,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr9,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\tr10,rax\n\tmov\trax,r11\n\tadc\tr10,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr12,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr14,r10\n\tadc\trdx,0\n\tadd\tr14,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr15,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr8,rax\n\tmov\trax,QWORD PTR[32+rbx]\n\tadc\trdx,0\n\tadd\tr8,r10\n\tadc\tr9,rdx\n\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr12,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr13,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr13,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr14,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr14,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmov\tr11,r12\n\timul\tr12,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr15,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr15,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr9,r10\n\tadc\trdx,0\n\tadd\tr9,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\tr11,rax\n\tmov\trax,r12\n\tadc\tr11,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr13,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr13,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr15,r11\n\tadc\trdx,0\n\tadd\tr15,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr8,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr8,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr9,rax\n\tmov\trax,QWORD PTR[40+rbx]\n\tadc\trdx,0\n\tadd\tr9,r11\n\tadc\tr10,rdx\n\n\tmov\trbp,rax\n\tmul\tQWORD PTR[rsi]\n\tadd\tr13,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[8+rsi]\n\tadd\tr14,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr14,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[16+rsi]\n\tadd\tr15,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr15,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmov\tr12,r13\n\timul\tr13,QWORD PTR[8+rsp]\n\n\tmul\tQWORD PTR[24+rsi]\n\tadd\tr8,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr8,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[32+rsi]\n\tadd\tr9,rax\n\tmov\trax,rbp\n\tadc\trdx,0\n\tadd\tr9,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[40+rsi]\n\tadd\tr10,r11\n\tadc\trdx,0\n\tadd\tr10,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[rcx]\n\tadd\tr12,rax\n\tmov\trax,r13\n\tadc\tr12,rdx\n\n\tmul\tQWORD PTR[8+rcx]\n\tadd\tr14,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr14,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[16+rcx]\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr15,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[24+rcx]\n\tadd\tr8,r12\n\tadc\trdx,0\n\tadd\tr8,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[32+rcx]\n\tadd\tr9,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr9,r12\n\tadc\trdx,0\n\tmov\tr12,rdx\n\n\tmul\tQWORD PTR[40+rcx]\n\tadd\tr10,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tadd\tr10,r12\n\tadc\tr11,rdx\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulq_mont_383_nonred\tENDP\nPUBLIC\tsqr_mont_382x\n\n\nALIGN\t32\nsqr_mont_382x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqr_mont_382x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nifdef __BLST_PORTABLE__\n\ttest\tDWORD PTR[__blst_platform_cap],1\n\tjnz\tsqr_mont_382x$1\nendif\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,136\n\n$L$SEH_body_sqr_mont_382x::\n\n\n\tmov\tQWORD PTR[rsp],rcx\n\tmov\trcx,rdx\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tQWORD PTR[24+rsp],rdi\n\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\tr14,r8\n\tadd\tr8,QWORD PTR[48+rsi]\n\tmov\tr15,r9\n\tadc\tr9,QWORD PTR[56+rsi]\n\tmov\trax,r10\n\tadc\tr10,QWORD PTR[64+rsi]\n\tmov\trdx,r11\n\tadc\tr11,QWORD PTR[72+rsi]\n\tmov\trbx,r12\n\tadc\tr12,QWORD PTR[80+rsi]\n\tmov\trbp,r13\n\tadc\tr13,QWORD PTR[88+rsi]\n\n\tsub\tr14,QWORD PTR[48+rsi]\n\tsbb\tr15,QWORD PTR[56+rsi]\n\tsbb\trax,QWORD PTR[64+rsi]\n\tsbb\trdx,QWORD PTR[72+rsi]\n\tsbb\trbx,QWORD PTR[80+rsi]\n\tsbb\trbp,QWORD PTR[88+rsi]\n\tsbb\trdi,rdi\n\n\tmov\tQWORD PTR[((32+0))+rsp],r8\n\tmov\tQWORD PTR[((32+8))+rsp],r9\n\tmov\tQWORD PTR[((32+16))+rsp],r10\n\tmov\tQWORD PTR[((32+24))+rsp],r11\n\tmov\tQWORD PTR[((32+32))+rsp],r12\n\tmov\tQWORD PTR[((32+40))+rsp],r13\n\n\tmov\tQWORD PTR[((32+48))+rsp],r14\n\tmov\tQWORD PTR[((32+56))+rsp],r15\n\tmov\tQWORD PTR[((32+64))+rsp],rax\n\tmov\tQWORD PTR[((32+72))+rsp],rdx\n\tmov\tQWORD PTR[((32+80))+rsp],rbx\n\tmov\tQWORD PTR[((32+88))+rsp],rbp\n\tmov\tQWORD PTR[((32+96))+rsp],rdi\n\n\n\n\tlea\trbx,QWORD PTR[48+rsi]\n\n\tmov\trax,QWORD PTR[48+rsi]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\tr12,QWORD PTR[16+rsi]\n\tmov\tr13,QWORD PTR[24+rsi]\n\n\tmov\trdi,QWORD PTR[24+rsp]\n\tcall\t__mulq_mont_383_nonred\n\tadd\tr14,r14\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\n\tmov\tQWORD PTR[48+rdi],r14\n\tmov\tQWORD PTR[56+rdi],r15\n\tmov\tQWORD PTR[64+rdi],r8\n\tmov\tQWORD PTR[72+rdi],r9\n\tmov\tQWORD PTR[80+rdi],r10\n\tmov\tQWORD PTR[88+rdi],r11\n\n\tlea\trsi,QWORD PTR[32+rsp]\n\tlea\trbx,QWORD PTR[((32+48))+rsp]\n\n\tmov\trax,QWORD PTR[((32+48))+rsp]\n\tmov\tr14,QWORD PTR[((32+0))+rsp]\n\tmov\tr15,QWORD PTR[((32+8))+rsp]\n\tmov\tr12,QWORD PTR[((32+16))+rsp]\n\tmov\tr13,QWORD PTR[((32+24))+rsp]\n\n\tcall\t__mulq_mont_383_nonred\n\tmov\trsi,QWORD PTR[((32+96))+rsp]\n\tmov\tr12,QWORD PTR[((32+0))+rsp]\n\tmov\tr13,QWORD PTR[((32+8))+rsp]\n\tand\tr12,rsi\n\tmov\trax,QWORD PTR[((32+16))+rsp]\n\tand\tr13,rsi\n\tmov\trbx,QWORD PTR[((32+24))+rsp]\n\tand\trax,rsi\n\tmov\trbp,QWORD PTR[((32+32))+rsp]\n\tand\trbx,rsi\n\tand\trbp,rsi\n\tand\trsi,QWORD PTR[((32+40))+rsp]\n\n\tsub\tr14,r12\n\tmov\tr12,QWORD PTR[rcx]\n\tsbb\tr15,r13\n\tmov\tr13,QWORD PTR[8+rcx]\n\tsbb\tr8,rax\n\tmov\trax,QWORD PTR[16+rcx]\n\tsbb\tr9,rbx\n\tmov\trbx,QWORD PTR[24+rcx]\n\tsbb\tr10,rbp\n\tmov\trbp,QWORD PTR[32+rcx]\n\tsbb\tr11,rsi\n\tsbb\trsi,rsi\n\n\tand\tr12,rsi\n\tand\tr13,rsi\n\tand\trax,rsi\n\tand\trbx,rsi\n\tand\trbp,rsi\n\tand\trsi,QWORD PTR[40+rcx]\n\n\tadd\tr14,r12\n\tadc\tr15,r13\n\tadc\tr8,rax\n\tadc\tr9,rbx\n\tadc\tr10,rbp\n\tadc\tr11,rsi\n\n\tmov\tQWORD PTR[rdi],r14\n\tmov\tQWORD PTR[8+rdi],r15\n\tmov\tQWORD PTR[16+rdi],r8\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqr_mont_382x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqr_mont_382x::\nsqr_mont_382x\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_mul_mont_384x\n\tDD\timagerel $L$SEH_body_mul_mont_384x\n\tDD\timagerel $L$SEH_info_mul_mont_384x_prologue\n\n\tDD\timagerel $L$SEH_body_mul_mont_384x\n\tDD\timagerel $L$SEH_epilogue_mul_mont_384x\n\tDD\timagerel $L$SEH_info_mul_mont_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_mont_384x\n\tDD\timagerel $L$SEH_end_mul_mont_384x\n\tDD\timagerel $L$SEH_info_mul_mont_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_mont_384x\n\tDD\timagerel $L$SEH_body_sqr_mont_384x\n\tDD\timagerel $L$SEH_info_sqr_mont_384x_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_mont_384x\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_384x\n\tDD\timagerel $L$SEH_info_sqr_mont_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_384x\n\tDD\timagerel $L$SEH_end_sqr_mont_384x\n\tDD\timagerel $L$SEH_info_sqr_mont_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_382x\n\tDD\timagerel $L$SEH_body_mul_382x\n\tDD\timagerel $L$SEH_info_mul_382x_prologue\n\n\tDD\timagerel $L$SEH_body_mul_382x\n\tDD\timagerel $L$SEH_epilogue_mul_382x\n\tDD\timagerel $L$SEH_info_mul_382x_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_382x\n\tDD\timagerel $L$SEH_end_mul_382x\n\tDD\timagerel $L$SEH_info_mul_382x_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_382x\n\tDD\timagerel $L$SEH_body_sqr_382x\n\tDD\timagerel $L$SEH_info_sqr_382x_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_382x\n\tDD\timagerel $L$SEH_epilogue_sqr_382x\n\tDD\timagerel $L$SEH_info_sqr_382x_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_382x\n\tDD\timagerel $L$SEH_end_sqr_382x\n\tDD\timagerel $L$SEH_info_sqr_382x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_384\n\tDD\timagerel $L$SEH_body_mul_384\n\tDD\timagerel $L$SEH_info_mul_384_prologue\n\n\tDD\timagerel $L$SEH_body_mul_384\n\tDD\timagerel $L$SEH_epilogue_mul_384\n\tDD\timagerel $L$SEH_info_mul_384_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_384\n\tDD\timagerel $L$SEH_end_mul_384\n\tDD\timagerel $L$SEH_info_mul_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_384\n\tDD\timagerel $L$SEH_body_sqr_384\n\tDD\timagerel $L$SEH_info_sqr_384_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_384\n\tDD\timagerel $L$SEH_epilogue_sqr_384\n\tDD\timagerel $L$SEH_info_sqr_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_384\n\tDD\timagerel $L$SEH_end_sqr_384\n\tDD\timagerel $L$SEH_info_sqr_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_mont_384\n\tDD\timagerel $L$SEH_body_sqr_mont_384\n\tDD\timagerel $L$SEH_info_sqr_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_mont_384\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_384\n\tDD\timagerel $L$SEH_info_sqr_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_384\n\tDD\timagerel $L$SEH_end_sqr_mont_384\n\tDD\timagerel $L$SEH_info_sqr_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_redc_mont_384\n\tDD\timagerel $L$SEH_body_redc_mont_384\n\tDD\timagerel $L$SEH_info_redc_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_redc_mont_384\n\tDD\timagerel $L$SEH_epilogue_redc_mont_384\n\tDD\timagerel $L$SEH_info_redc_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_redc_mont_384\n\tDD\timagerel $L$SEH_end_redc_mont_384\n\tDD\timagerel $L$SEH_info_redc_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_from_mont_384\n\tDD\timagerel $L$SEH_body_from_mont_384\n\tDD\timagerel $L$SEH_info_from_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_from_mont_384\n\tDD\timagerel $L$SEH_epilogue_from_mont_384\n\tDD\timagerel $L$SEH_info_from_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_from_mont_384\n\tDD\timagerel $L$SEH_end_from_mont_384\n\tDD\timagerel $L$SEH_info_from_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sgn0_pty_mont_384\n\tDD\timagerel $L$SEH_body_sgn0_pty_mont_384\n\tDD\timagerel $L$SEH_info_sgn0_pty_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_sgn0_pty_mont_384\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mont_384\n\tDD\timagerel $L$SEH_info_sgn0_pty_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mont_384\n\tDD\timagerel $L$SEH_end_sgn0_pty_mont_384\n\tDD\timagerel $L$SEH_info_sgn0_pty_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sgn0_pty_mont_384x\n\tDD\timagerel $L$SEH_body_sgn0_pty_mont_384x\n\tDD\timagerel $L$SEH_info_sgn0_pty_mont_384x_prologue\n\n\tDD\timagerel $L$SEH_body_sgn0_pty_mont_384x\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mont_384x\n\tDD\timagerel $L$SEH_info_sgn0_pty_mont_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_sgn0_pty_mont_384x\n\tDD\timagerel $L$SEH_end_sgn0_pty_mont_384x\n\tDD\timagerel $L$SEH_info_sgn0_pty_mont_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mul_mont_384\n\tDD\timagerel $L$SEH_body_mul_mont_384\n\tDD\timagerel $L$SEH_info_mul_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_mul_mont_384\n\tDD\timagerel $L$SEH_epilogue_mul_mont_384\n\tDD\timagerel $L$SEH_info_mul_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_mul_mont_384\n\tDD\timagerel $L$SEH_end_mul_mont_384\n\tDD\timagerel $L$SEH_info_mul_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_n_mul_mont_384\n\tDD\timagerel $L$SEH_body_sqr_n_mul_mont_384\n\tDD\timagerel $L$SEH_info_sqr_n_mul_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_n_mul_mont_384\n\tDD\timagerel $L$SEH_epilogue_sqr_n_mul_mont_384\n\tDD\timagerel $L$SEH_info_sqr_n_mul_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_n_mul_mont_384\n\tDD\timagerel $L$SEH_end_sqr_n_mul_mont_384\n\tDD\timagerel $L$SEH_info_sqr_n_mul_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_n_mul_mont_383\n\tDD\timagerel $L$SEH_body_sqr_n_mul_mont_383\n\tDD\timagerel $L$SEH_info_sqr_n_mul_mont_383_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_n_mul_mont_383\n\tDD\timagerel $L$SEH_epilogue_sqr_n_mul_mont_383\n\tDD\timagerel $L$SEH_info_sqr_n_mul_mont_383_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_n_mul_mont_383\n\tDD\timagerel $L$SEH_end_sqr_n_mul_mont_383\n\tDD\timagerel $L$SEH_info_sqr_n_mul_mont_383_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqr_mont_382x\n\tDD\timagerel $L$SEH_body_sqr_mont_382x\n\tDD\timagerel $L$SEH_info_sqr_mont_382x_prologue\n\n\tDD\timagerel $L$SEH_body_sqr_mont_382x\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_382x\n\tDD\timagerel $L$SEH_info_sqr_mont_382x_body\n\n\tDD\timagerel $L$SEH_epilogue_sqr_mont_382x\n\tDD\timagerel $L$SEH_end_sqr_mont_382x\n\tDD\timagerel $L$SEH_info_sqr_mont_382x_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_mul_mont_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_mont_384x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,029h,000h\nDB\t000h,0e4h,02ah,000h\nDB\t000h,0d4h,02bh,000h\nDB\t000h,0c4h,02ch,000h\nDB\t000h,034h,02dh,000h\nDB\t000h,054h,02eh,000h\nDB\t000h,074h,030h,000h\nDB\t000h,064h,031h,000h\nDB\t000h,001h,02fh,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_mont_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_mont_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_mont_384x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_mont_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_382x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_382x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_382x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_382x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_382x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_382x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_384_body::\nDB\t1,0,11,0\nDB\t000h,0c4h,000h,000h\nDB\t000h,034h,001h,000h\nDB\t000h,054h,002h,000h\nDB\t000h,074h,004h,000h\nDB\t000h,064h,005h,000h\nDB\t000h,022h\nDB\t000h,000h,000h,000h,000h,000h\n$L$SEH_info_mul_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_mont_384_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,00fh,000h\nDB\t000h,0e4h,010h,000h\nDB\t000h,0d4h,011h,000h\nDB\t000h,0c4h,012h,000h\nDB\t000h,034h,013h,000h\nDB\t000h,054h,014h,000h\nDB\t000h,074h,016h,000h\nDB\t000h,064h,017h,000h\nDB\t000h,001h,015h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_redc_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_redc_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_redc_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_from_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_from_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_from_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sgn0_pty_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sgn0_pty_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sgn0_pty_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sgn0_pty_mont_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sgn0_pty_mont_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sgn0_pty_mont_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mul_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mul_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,003h,000h\nDB\t000h,0e4h,004h,000h\nDB\t000h,0d4h,005h,000h\nDB\t000h,0c4h,006h,000h\nDB\t000h,034h,007h,000h\nDB\t000h,054h,008h,000h\nDB\t000h,074h,00ah,000h\nDB\t000h,064h,00bh,000h\nDB\t000h,082h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mul_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_n_mul_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_n_mul_mont_384_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_n_mul_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_n_mul_mont_383_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_n_mul_mont_383_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_n_mul_mont_383_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqr_mont_382x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqr_mont_382x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqr_mont_382x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/mulx_mont_256-x86_64.asm",
    "content": "OPTION\tDOTNAME\nPUBLIC\tmul_mont_sparse_256$1\nPUBLIC\tsqr_mont_sparse_256$1\nPUBLIC\tfrom_mont_256$1\nPUBLIC\tredc_mont_256$1\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\nPUBLIC\tmulx_mont_sparse_256\n\n\nALIGN\t32\nmulx_mont_sparse_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mulx_mont_sparse_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\nmul_mont_sparse_256$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_mulx_mont_sparse_256::\n\n\n\tmov\trbx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rdx]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trbp,QWORD PTR[16+rsi]\n\tmov\tr9,QWORD PTR[24+rsi]\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr11,rax,r14\n\tcall\t__mulx_mont_sparse_256\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_mulx_mont_sparse_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mulx_mont_sparse_256::\nmulx_mont_sparse_256\tENDP\n\nPUBLIC\tsqrx_mont_sparse_256\n\n\nALIGN\t32\nsqrx_mont_sparse_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_mont_sparse_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nsqr_mont_sparse_256$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sqrx_mont_sparse_256::\n\n\n\tmov\trbx,rsi\n\tmov\tr8,rcx\n\tmov\trcx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trbp,QWORD PTR[16+rsi]\n\tmov\tr9,QWORD PTR[24+rsi]\n\tlea\trsi,QWORD PTR[((-128))+rbx]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr11,rax,rdx\n\tcall\t__mulx_mont_sparse_256\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sqrx_mont_sparse_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_mont_sparse_256::\nsqrx_mont_sparse_256\tENDP\n\nALIGN\t32\n__mulx_mont_sparse_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmulx\tr12,r15,r15\n\tmulx\tr13,rbp,rbp\n\tadd\tr11,r15\n\tmulx\tr14,r9,r9\n\tmov\trdx,QWORD PTR[8+rbx]\n\tadc\tr12,rbp\n\tadc\tr13,r9\n\tadc\tr14,0\n\n\tmov\tr10,rax\n\timul\trax,r8\n\n\n\txor\tr15,r15\n\tmulx\tr9,rbp,QWORD PTR[((0+128))+rsi]\n\tadox\tr11,rbp\n\tadcx\tr12,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rsi]\n\tadox\tr12,rbp\n\tadcx\tr13,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rsi]\n\tadox\tr13,rbp\n\tadcx\tr14,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rsi]\n\tmov\trdx,rax\n\tadox\tr14,rbp\n\tadcx\tr9,r15\n\tadox\tr15,r9\n\n\n\tmulx\trax,rbp,QWORD PTR[((0+128))+rcx]\n\tadcx\tr10,rbp\n\tadox\trax,r11\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rcx]\n\tadcx\trax,rbp\n\tadox\tr12,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rcx]\n\tadcx\tr12,rbp\n\tadox\tr13,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rcx]\n\tmov\trdx,QWORD PTR[16+rbx]\n\tadcx\tr13,rbp\n\tadox\tr14,r9\n\tadcx\tr14,r10\n\tadox\tr15,r10\n\tadcx\tr15,r10\n\tadox\tr10,r10\n\tadc\tr10,0\n\tmov\tr11,rax\n\timul\trax,r8\n\n\n\txor\trbp,rbp\n\tmulx\tr9,rbp,QWORD PTR[((0+128))+rsi]\n\tadox\tr12,rbp\n\tadcx\tr13,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rsi]\n\tadox\tr13,rbp\n\tadcx\tr14,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rsi]\n\tadox\tr14,rbp\n\tadcx\tr15,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rsi]\n\tmov\trdx,rax\n\tadox\tr15,rbp\n\tadcx\tr9,r10\n\tadox\tr10,r9\n\n\n\tmulx\trax,rbp,QWORD PTR[((0+128))+rcx]\n\tadcx\tr11,rbp\n\tadox\trax,r12\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rcx]\n\tadcx\trax,rbp\n\tadox\tr13,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rcx]\n\tadcx\tr13,rbp\n\tadox\tr14,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rcx]\n\tmov\trdx,QWORD PTR[24+rbx]\n\tadcx\tr14,rbp\n\tadox\tr15,r9\n\tadcx\tr15,r11\n\tadox\tr10,r11\n\tadcx\tr10,r11\n\tadox\tr11,r11\n\tadc\tr11,0\n\tmov\tr12,rax\n\timul\trax,r8\n\n\n\txor\trbp,rbp\n\tmulx\tr9,rbp,QWORD PTR[((0+128))+rsi]\n\tadox\tr13,rbp\n\tadcx\tr14,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rsi]\n\tadox\tr14,rbp\n\tadcx\tr15,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rsi]\n\tadox\tr15,rbp\n\tadcx\tr10,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rsi]\n\tmov\trdx,rax\n\tadox\tr10,rbp\n\tadcx\tr9,r11\n\tadox\tr11,r9\n\n\n\tmulx\trax,rbp,QWORD PTR[((0+128))+rcx]\n\tadcx\tr12,rbp\n\tadox\trax,r13\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rcx]\n\tadcx\trax,rbp\n\tadox\tr14,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rcx]\n\tadcx\tr14,rbp\n\tadox\tr15,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rcx]\n\tmov\trdx,rax\n\tadcx\tr15,rbp\n\tadox\tr10,r9\n\tadcx\tr10,r12\n\tadox\tr11,r12\n\tadcx\tr11,r12\n\tadox\tr12,r12\n\tadc\tr12,0\n\timul\trdx,r8\n\n\n\txor\trbp,rbp\n\tmulx\tr9,r13,QWORD PTR[((0+128))+rcx]\n\tadcx\tr13,rax\n\tadox\tr14,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((8+128))+rcx]\n\tadcx\tr14,rbp\n\tadox\tr15,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((16+128))+rcx]\n\tadcx\tr15,rbp\n\tadox\tr10,r9\n\n\tmulx\tr9,rbp,QWORD PTR[((24+128))+rcx]\n\tmov\trdx,r14\n\tlea\trcx,QWORD PTR[128+rcx]\n\tadcx\tr10,rbp\n\tadox\tr11,r9\n\tmov\trax,r15\n\tadcx\tr11,r13\n\tadox\tr12,r13\n\tadc\tr12,0\n\n\n\n\n\tmov\trbp,r10\n\tsub\tr14,QWORD PTR[rcx]\n\tsbb\tr15,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tmov\tr9,r11\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,0\n\n\tcmovc\tr14,rdx\n\tcmovc\tr15,rax\n\tcmovc\tr10,rbp\n\tmov\tQWORD PTR[rdi],r14\n\tcmovc\tr11,r9\n\tmov\tQWORD PTR[8+rdi],r15\n\tmov\tQWORD PTR[16+rdi],r10\n\tmov\tQWORD PTR[24+rdi],r11\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulx_mont_sparse_256\tENDP\nPUBLIC\tfromx_mont_256\n\n\nALIGN\t32\nfromx_mont_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_fromx_mont_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nfrom_mont_256$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_fromx_mont_256::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulx_by_1_mont_256\n\n\n\n\n\n\tmov\trdx,r15\n\tmov\tr12,r10\n\tmov\tr13,r11\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr10,QWORD PTR[16+rbx]\n\tsbb\tr11,QWORD PTR[24+rbx]\n\n\tcmovnc\trax,r14\n\tcmovnc\trdx,r15\n\tcmovnc\tr12,r10\n\tmov\tQWORD PTR[rdi],rax\n\tcmovnc\tr13,r11\n\tmov\tQWORD PTR[8+rdi],rdx\n\tmov\tQWORD PTR[16+rdi],r12\n\tmov\tQWORD PTR[24+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_fromx_mont_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_fromx_mont_256::\nfromx_mont_256\tENDP\n\nPUBLIC\tredcx_mont_256\n\n\nALIGN\t32\nredcx_mont_256\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_redcx_mont_256::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nredc_mont_256$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_redcx_mont_256::\n\n\n\tmov\trbx,rdx\n\tcall\t__mulx_by_1_mont_256\n\n\tadd\tr14,QWORD PTR[32+rsi]\n\tadc\tr15,QWORD PTR[40+rsi]\n\tmov\trax,r14\n\tadc\tr10,QWORD PTR[48+rsi]\n\tmov\trdx,r15\n\tadc\tr11,QWORD PTR[56+rsi]\n\tsbb\trsi,rsi\n\n\n\n\n\tmov\tr12,r10\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr10,QWORD PTR[16+rbx]\n\tmov\tr13,r11\n\tsbb\tr11,QWORD PTR[24+rbx]\n\tsbb\trsi,0\n\n\tcmovnc\trax,r14\n\tcmovnc\trdx,r15\n\tcmovnc\tr12,r10\n\tmov\tQWORD PTR[rdi],rax\n\tcmovnc\tr13,r11\n\tmov\tQWORD PTR[8+rdi],rdx\n\tmov\tQWORD PTR[16+rdi],r12\n\tmov\tQWORD PTR[24+rdi],r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_redcx_mont_256::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_redcx_mont_256::\nredcx_mont_256\tENDP\n\nALIGN\t32\n__mulx_by_1_mont_256\tPROC PRIVATE\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trax,QWORD PTR[rsi]\n\tmov\tr11,QWORD PTR[8+rsi]\n\tmov\tr12,QWORD PTR[16+rsi]\n\tmov\tr13,QWORD PTR[24+rsi]\n\n\tmov\tr14,rax\n\timul\trax,rcx\n\tmov\tr10,rax\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr14,rax\n\tmov\trax,r10\n\tadc\tr14,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr11,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr11,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tmov\tr15,r11\n\timul\tr11,rcx\n\tadd\tr12,rax\n\tmov\trax,r10\n\tadc\trdx,0\n\tadd\tr12,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r14\n\tadc\trdx,0\n\tmov\tr14,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr15,rax\n\tmov\trax,r11\n\tadc\tr15,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr12,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr12,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tmov\tr10,r12\n\timul\tr12,rcx\n\tadd\tr13,rax\n\tmov\trax,r11\n\tadc\trdx,0\n\tadd\tr13,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,r15\n\tadc\trdx,0\n\tmov\tr15,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr10,rax\n\tmov\trax,r12\n\tadc\tr10,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr13,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr13,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tmov\tr11,r13\n\timul\tr13,rcx\n\tadd\tr14,rax\n\tmov\trax,r12\n\tadc\trdx,0\n\tadd\tr14,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr15,r10\n\tadc\trdx,0\n\tmov\tr10,rdx\n\n\tmul\tQWORD PTR[rbx]\n\tadd\tr11,rax\n\tmov\trax,r13\n\tadc\tr11,rdx\n\n\tmul\tQWORD PTR[8+rbx]\n\tadd\tr14,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr14,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[16+rbx]\n\tadd\tr15,rax\n\tmov\trax,r13\n\tadc\trdx,0\n\tadd\tr15,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\n\tmul\tQWORD PTR[24+rbx]\n\tadd\tr10,rax\n\tmov\trax,r14\n\tadc\trdx,0\n\tadd\tr10,r11\n\tadc\trdx,0\n\tmov\tr11,rdx\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulx_by_1_mont_256\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_mulx_mont_sparse_256\n\tDD\timagerel $L$SEH_body_mulx_mont_sparse_256\n\tDD\timagerel $L$SEH_info_mulx_mont_sparse_256_prologue\n\n\tDD\timagerel $L$SEH_body_mulx_mont_sparse_256\n\tDD\timagerel $L$SEH_epilogue_mulx_mont_sparse_256\n\tDD\timagerel $L$SEH_info_mulx_mont_sparse_256_body\n\n\tDD\timagerel $L$SEH_epilogue_mulx_mont_sparse_256\n\tDD\timagerel $L$SEH_end_mulx_mont_sparse_256\n\tDD\timagerel $L$SEH_info_mulx_mont_sparse_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_mont_sparse_256\n\tDD\timagerel $L$SEH_body_sqrx_mont_sparse_256\n\tDD\timagerel $L$SEH_info_sqrx_mont_sparse_256_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_mont_sparse_256\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_sparse_256\n\tDD\timagerel $L$SEH_info_sqrx_mont_sparse_256_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_sparse_256\n\tDD\timagerel $L$SEH_end_sqrx_mont_sparse_256\n\tDD\timagerel $L$SEH_info_sqrx_mont_sparse_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_fromx_mont_256\n\tDD\timagerel $L$SEH_body_fromx_mont_256\n\tDD\timagerel $L$SEH_info_fromx_mont_256_prologue\n\n\tDD\timagerel $L$SEH_body_fromx_mont_256\n\tDD\timagerel $L$SEH_epilogue_fromx_mont_256\n\tDD\timagerel $L$SEH_info_fromx_mont_256_body\n\n\tDD\timagerel $L$SEH_epilogue_fromx_mont_256\n\tDD\timagerel $L$SEH_end_fromx_mont_256\n\tDD\timagerel $L$SEH_info_fromx_mont_256_epilogue\n\n\tDD\timagerel $L$SEH_begin_redcx_mont_256\n\tDD\timagerel $L$SEH_body_redcx_mont_256\n\tDD\timagerel $L$SEH_info_redcx_mont_256_prologue\n\n\tDD\timagerel $L$SEH_body_redcx_mont_256\n\tDD\timagerel $L$SEH_epilogue_redcx_mont_256\n\tDD\timagerel $L$SEH_info_redcx_mont_256_body\n\n\tDD\timagerel $L$SEH_epilogue_redcx_mont_256\n\tDD\timagerel $L$SEH_end_redcx_mont_256\n\tDD\timagerel $L$SEH_info_redcx_mont_256_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_mulx_mont_sparse_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mulx_mont_sparse_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mulx_mont_sparse_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_mont_sparse_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_mont_sparse_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_mont_sparse_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_fromx_mont_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_fromx_mont_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_fromx_mont_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_redcx_mont_256_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_redcx_mont_256_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_redcx_mont_256_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/mulx_mont_384-x86_64.asm",
    "content": "OPTION\tDOTNAME\nPUBLIC\tmul_mont_384x$1\nPUBLIC\tsqr_mont_384x$1\nPUBLIC\tmul_382x$1\nPUBLIC\tsqr_382x$1\nPUBLIC\tmul_384$1\nPUBLIC\tsqr_384$1\nPUBLIC\tredc_mont_384$1\nPUBLIC\tfrom_mont_384$1\nPUBLIC\tsgn0_pty_mont_384$1\nPUBLIC\tsgn0_pty_mont_384x$1\nPUBLIC\tmul_mont_384$1\nPUBLIC\tsqr_mont_384$1\nPUBLIC\tsqr_n_mul_mont_384$1\nPUBLIC\tsqr_n_mul_mont_383$1\nPUBLIC\tsqr_mont_382x$1\n.text$\tSEGMENT ALIGN(256) 'CODE'\n\n\n\n\n\n\n\n\nALIGN\t32\n__subx_mod_384x384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tmov\tr14,QWORD PTR[48+rsi]\n\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\tr15,QWORD PTR[56+rsi]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\trax,QWORD PTR[64+rsi]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trbx,QWORD PTR[72+rsi]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbp,QWORD PTR[80+rsi]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trsi,QWORD PTR[88+rsi]\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tmov\tQWORD PTR[rdi],r8\n\tsbb\tr14,QWORD PTR[48+rdx]\n\tmov\tr8,QWORD PTR[rcx]\n\tmov\tQWORD PTR[8+rdi],r9\n\tsbb\tr15,QWORD PTR[56+rdx]\n\tmov\tr9,QWORD PTR[8+rcx]\n\tmov\tQWORD PTR[16+rdi],r10\n\tsbb\trax,QWORD PTR[64+rdx]\n\tmov\tr10,QWORD PTR[16+rcx]\n\tmov\tQWORD PTR[24+rdi],r11\n\tsbb\trbx,QWORD PTR[72+rdx]\n\tmov\tr11,QWORD PTR[24+rcx]\n\tmov\tQWORD PTR[32+rdi],r12\n\tsbb\trbp,QWORD PTR[80+rdx]\n\tmov\tr12,QWORD PTR[32+rcx]\n\tmov\tQWORD PTR[40+rdi],r13\n\tsbb\trsi,QWORD PTR[88+rdx]\n\tmov\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,rdx\n\n\tand\tr8,rdx\n\tand\tr9,rdx\n\tand\tr10,rdx\n\tand\tr11,rdx\n\tand\tr12,rdx\n\tand\tr13,rdx\n\n\tadd\tr14,r8\n\tadc\tr15,r9\n\tmov\tQWORD PTR[48+rdi],r14\n\tadc\trax,r10\n\tmov\tQWORD PTR[56+rdi],r15\n\tadc\trbx,r11\n\tmov\tQWORD PTR[64+rdi],rax\n\tadc\trbp,r12\n\tmov\tQWORD PTR[72+rdi],rbx\n\tadc\trsi,r13\n\tmov\tQWORD PTR[80+rdi],rbp\n\tmov\tQWORD PTR[88+rdi],rsi\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__subx_mod_384x384\tENDP\n\n\nALIGN\t32\n__addx_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tadd\tr8,QWORD PTR[rdx]\n\tadc\tr9,QWORD PTR[8+rdx]\n\tadc\tr10,QWORD PTR[16+rdx]\n\tmov\tr14,r8\n\tadc\tr11,QWORD PTR[24+rdx]\n\tmov\tr15,r9\n\tadc\tr12,QWORD PTR[32+rdx]\n\tmov\trax,r10\n\tadc\tr13,QWORD PTR[40+rdx]\n\tmov\trbx,r11\n\tsbb\trdx,rdx\n\n\tsub\tr8,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rcx]\n\tmov\trbp,r12\n\tsbb\tr10,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rcx]\n\tmov\trsi,r13\n\tsbb\tr13,QWORD PTR[40+rcx]\n\tsbb\trdx,0\n\n\tcmovc\tr8,r14\n\tcmovc\tr9,r15\n\tcmovc\tr10,rax\n\tmov\tQWORD PTR[rdi],r8\n\tcmovc\tr11,rbx\n\tmov\tQWORD PTR[8+rdi],r9\n\tcmovc\tr12,rbp\n\tmov\tQWORD PTR[16+rdi],r10\n\tcmovc\tr13,rsi\n\tmov\tQWORD PTR[24+rdi],r11\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__addx_mod_384\tENDP\n\n\nALIGN\t32\n__subx_mod_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n__subx_mod_384_a_is_loaded::\n\tsub\tr8,QWORD PTR[rdx]\n\tmov\tr14,QWORD PTR[rcx]\n\tsbb\tr9,QWORD PTR[8+rdx]\n\tmov\tr15,QWORD PTR[8+rcx]\n\tsbb\tr10,QWORD PTR[16+rdx]\n\tmov\trax,QWORD PTR[16+rcx]\n\tsbb\tr11,QWORD PTR[24+rdx]\n\tmov\trbx,QWORD PTR[24+rcx]\n\tsbb\tr12,QWORD PTR[32+rdx]\n\tmov\trbp,QWORD PTR[32+rcx]\n\tsbb\tr13,QWORD PTR[40+rdx]\n\tmov\trsi,QWORD PTR[40+rcx]\n\tsbb\trdx,rdx\n\n\tand\tr14,rdx\n\tand\tr15,rdx\n\tand\trax,rdx\n\tand\trbx,rdx\n\tand\trbp,rdx\n\tand\trsi,rdx\n\n\tadd\tr8,r14\n\tadc\tr9,r15\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,rax\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,rbx\n\tmov\tQWORD PTR[16+rdi],r10\n\tadc\tr12,rbp\n\tmov\tQWORD PTR[24+rdi],r11\n\tadc\tr13,rsi\n\tmov\tQWORD PTR[32+rdi],r12\n\tmov\tQWORD PTR[40+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__subx_mod_384\tENDP\nPUBLIC\tmulx_mont_384x\n\n\nALIGN\t32\nmulx_mont_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mulx_mont_384x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\nmul_mont_384x$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,328\n\n$L$SEH_body_mulx_mont_384x::\n\n\n\tmov\trbx,rdx\n\tmov\tQWORD PTR[32+rsp],rdi\n\tmov\tQWORD PTR[24+rsp],rsi\n\tmov\tQWORD PTR[16+rsp],rdx\n\tmov\tQWORD PTR[8+rsp],rcx\n\tmov\tQWORD PTR[rsp],r8\n\n\n\n\n\tlea\trdi,QWORD PTR[40+rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_384\n\n\n\tlea\trbx,QWORD PTR[48+rbx]\n\tlea\trsi,QWORD PTR[((128+48))+rsi]\n\tlea\trdi,QWORD PTR[96+rdi]\n\tcall\t__mulx_384\n\n\n\tmov\trcx,QWORD PTR[8+rsp]\n\tlea\trsi,QWORD PTR[rbx]\n\tlea\trdx,QWORD PTR[((-48))+rbx]\n\tlea\trdi,QWORD PTR[((40+192+48))+rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__addx_mod_384\n\n\tmov\trsi,QWORD PTR[24+rsp]\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__addx_mod_384\n\n\tlea\trbx,QWORD PTR[rdi]\n\tlea\trsi,QWORD PTR[48+rdi]\n\tcall\t__mulx_384\n\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trdx,QWORD PTR[40+rsp]\n\tmov\trcx,QWORD PTR[8+rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__subx_mod_384x384\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trdx,QWORD PTR[((-96))+rdi]\n\tcall\t__subx_mod_384x384\n\n\n\tlea\trsi,QWORD PTR[40+rsp]\n\tlea\trdx,QWORD PTR[((40+96))+rsp]\n\tlea\trdi,QWORD PTR[40+rsp]\n\tcall\t__subx_mod_384x384\n\n\tlea\trbx,QWORD PTR[rcx]\n\n\n\tlea\trsi,QWORD PTR[40+rsp]\n\tmov\trcx,QWORD PTR[rsp]\n\tmov\trdi,QWORD PTR[32+rsp]\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\n\tlea\trsi,QWORD PTR[((40+192))+rsp]\n\tmov\trcx,QWORD PTR[rsp]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tlea\tr8,QWORD PTR[328+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_mulx_mont_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mulx_mont_384x::\nmulx_mont_384x\tENDP\nPUBLIC\tsqrx_mont_384x\n\n\nALIGN\t32\nsqrx_mont_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_mont_384x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nsqr_mont_384x$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,136\n\n$L$SEH_body_sqrx_mont_384x::\n\n\n\tmov\tQWORD PTR[rsp],rcx\n\tmov\trcx,rdx\n\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\tQWORD PTR[24+rsp],rsi\n\n\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[32+rsp]\n\tcall\t__addx_mod_384\n\n\n\tmov\trsi,QWORD PTR[24+rsp]\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[((32+48))+rsp]\n\tcall\t__subx_mod_384\n\n\n\tmov\trsi,QWORD PTR[24+rsp]\n\tlea\trbx,QWORD PTR[48+rsi]\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[48+rsi]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\tr12,QWORD PTR[24+rsi]\n\tmov\trdi,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[40+rsi]\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_384\n\tadd\trdx,rdx\n\tadc\tr15,r15\n\tadc\trax,rax\n\tmov\tr8,rdx\n\tadc\tr12,r12\n\tmov\tr9,r15\n\tadc\trdi,rdi\n\tmov\tr10,rax\n\tadc\trbp,rbp\n\tmov\tr11,r12\n\tsbb\trsi,rsi\n\n\tsub\trdx,QWORD PTR[rcx]\n\tsbb\tr15,QWORD PTR[8+rcx]\n\tmov\tr13,rdi\n\tsbb\trax,QWORD PTR[16+rcx]\n\tsbb\tr12,QWORD PTR[24+rcx]\n\tsbb\trdi,QWORD PTR[32+rcx]\n\tmov\tr14,rbp\n\tsbb\trbp,QWORD PTR[40+rcx]\n\tsbb\trsi,0\n\n\tcmovc\trdx,r8\n\tcmovc\tr15,r9\n\tcmovc\trax,r10\n\tmov\tQWORD PTR[48+rbx],rdx\n\tcmovc\tr12,r11\n\tmov\tQWORD PTR[56+rbx],r15\n\tcmovc\trdi,r13\n\tmov\tQWORD PTR[64+rbx],rax\n\tcmovc\trbp,r14\n\tmov\tQWORD PTR[72+rbx],r12\n\tmov\tQWORD PTR[80+rbx],rdi\n\tmov\tQWORD PTR[88+rbx],rbp\n\n\tlea\trsi,QWORD PTR[32+rsp]\n\tlea\trbx,QWORD PTR[((32+48))+rsp]\n\n\tmov\trdx,QWORD PTR[((32+48))+rsp]\n\tmov\tr14,QWORD PTR[((32+0))+rsp]\n\tmov\tr15,QWORD PTR[((32+8))+rsp]\n\tmov\trax,QWORD PTR[((32+16))+rsp]\n\tmov\tr12,QWORD PTR[((32+24))+rsp]\n\tmov\trdi,QWORD PTR[((32+32))+rsp]\n\tmov\trbp,QWORD PTR[((32+40))+rsp]\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_384\n\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqrx_mont_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_mont_384x::\nsqrx_mont_384x\tENDP\n\nPUBLIC\tmulx_382x\n\n\nALIGN\t32\nmulx_382x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mulx_382x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nmul_382x$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,136\n\n$L$SEH_body_mulx_382x::\n\n\n\tlea\trdi,QWORD PTR[96+rdi]\n\tmov\tQWORD PTR[rsp],rsi\n\tmov\tQWORD PTR[8+rsp],rdx\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\tQWORD PTR[24+rsp],rcx\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tadd\tr8,QWORD PTR[48+rsi]\n\tadc\tr9,QWORD PTR[56+rsi]\n\tadc\tr10,QWORD PTR[64+rsi]\n\tadc\tr11,QWORD PTR[72+rsi]\n\tadc\tr12,QWORD PTR[80+rsi]\n\tadc\tr13,QWORD PTR[88+rsi]\n\n\tmov\tQWORD PTR[((32+0))+rsp],r8\n\tmov\tQWORD PTR[((32+8))+rsp],r9\n\tmov\tQWORD PTR[((32+16))+rsp],r10\n\tmov\tQWORD PTR[((32+24))+rsp],r11\n\tmov\tQWORD PTR[((32+32))+rsp],r12\n\tmov\tQWORD PTR[((32+40))+rsp],r13\n\n\n\tmov\tr8,QWORD PTR[rdx]\n\tmov\tr9,QWORD PTR[8+rdx]\n\tmov\tr10,QWORD PTR[16+rdx]\n\tmov\tr11,QWORD PTR[24+rdx]\n\tmov\tr12,QWORD PTR[32+rdx]\n\tmov\tr13,QWORD PTR[40+rdx]\n\n\tadd\tr8,QWORD PTR[48+rdx]\n\tadc\tr9,QWORD PTR[56+rdx]\n\tadc\tr10,QWORD PTR[64+rdx]\n\tadc\tr11,QWORD PTR[72+rdx]\n\tadc\tr12,QWORD PTR[80+rdx]\n\tadc\tr13,QWORD PTR[88+rdx]\n\n\tmov\tQWORD PTR[((32+48))+rsp],r8\n\tmov\tQWORD PTR[((32+56))+rsp],r9\n\tmov\tQWORD PTR[((32+64))+rsp],r10\n\tmov\tQWORD PTR[((32+72))+rsp],r11\n\tmov\tQWORD PTR[((32+80))+rsp],r12\n\tmov\tQWORD PTR[((32+88))+rsp],r13\n\n\n\tlea\trsi,QWORD PTR[((32+0))+rsp]\n\tlea\trbx,QWORD PTR[((32+48))+rsp]\n\tcall\t__mulx_384\n\n\n\tmov\trsi,QWORD PTR[rsp]\n\tmov\trbx,QWORD PTR[8+rsp]\n\tlea\trdi,QWORD PTR[((-96))+rdi]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_384\n\n\n\tlea\trsi,QWORD PTR[((48+128))+rsi]\n\tlea\trbx,QWORD PTR[48+rbx]\n\tlea\trdi,QWORD PTR[32+rsp]\n\tcall\t__mulx_384\n\n\n\tmov\trsi,QWORD PTR[16+rsp]\n\tlea\trdx,QWORD PTR[32+rsp]\n\tmov\trcx,QWORD PTR[24+rsp]\n\tmov\trdi,rsi\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__subx_mod_384x384\n\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trdx,QWORD PTR[((-96))+rdi]\n\tcall\t__subx_mod_384x384\n\n\n\tlea\trsi,QWORD PTR[((-96))+rdi]\n\tlea\trdx,QWORD PTR[32+rsp]\n\tlea\trdi,QWORD PTR[((-96))+rdi]\n\tcall\t__subx_mod_384x384\n\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_mulx_382x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mulx_382x::\nmulx_382x\tENDP\nPUBLIC\tsqrx_382x\n\n\nALIGN\t32\nsqrx_382x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_382x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nsqr_382x$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trsi\n\n$L$SEH_body_sqrx_382x::\n\n\n\tmov\trcx,rdx\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\trbx,QWORD PTR[24+rsi]\n\tmov\trbp,QWORD PTR[32+rsi]\n\tmov\trdx,QWORD PTR[40+rsi]\n\n\tmov\tr8,r14\n\tadd\tr14,QWORD PTR[48+rsi]\n\tmov\tr9,r15\n\tadc\tr15,QWORD PTR[56+rsi]\n\tmov\tr10,rax\n\tadc\trax,QWORD PTR[64+rsi]\n\tmov\tr11,rbx\n\tadc\trbx,QWORD PTR[72+rsi]\n\tmov\tr12,rbp\n\tadc\trbp,QWORD PTR[80+rsi]\n\tmov\tr13,rdx\n\tadc\trdx,QWORD PTR[88+rsi]\n\n\tmov\tQWORD PTR[rdi],r14\n\tmov\tQWORD PTR[8+rdi],r15\n\tmov\tQWORD PTR[16+rdi],rax\n\tmov\tQWORD PTR[24+rdi],rbx\n\tmov\tQWORD PTR[32+rdi],rbp\n\tmov\tQWORD PTR[40+rdi],rdx\n\n\n\tlea\trdx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[48+rdi]\n\tcall\t__subx_mod_384_a_is_loaded\n\n\n\tlea\trsi,QWORD PTR[rdi]\n\tlea\trbx,QWORD PTR[((-48))+rdi]\n\tlea\trdi,QWORD PTR[((-48))+rdi]\n\tcall\t__mulx_384\n\n\n\tmov\trsi,QWORD PTR[rsp]\n\tlea\trbx,QWORD PTR[48+rsi]\n\tlea\trdi,QWORD PTR[96+rdi]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_384\n\n\tmov\tr8,QWORD PTR[rdi]\n\tmov\tr9,QWORD PTR[8+rdi]\n\tmov\tr10,QWORD PTR[16+rdi]\n\tmov\tr11,QWORD PTR[24+rdi]\n\tmov\tr12,QWORD PTR[32+rdi]\n\tmov\tr13,QWORD PTR[40+rdi]\n\tmov\tr14,QWORD PTR[48+rdi]\n\tmov\tr15,QWORD PTR[56+rdi]\n\tmov\trax,QWORD PTR[64+rdi]\n\tmov\trbx,QWORD PTR[72+rdi]\n\tmov\trbp,QWORD PTR[80+rdi]\n\tadd\tr8,r8\n\tmov\trdx,QWORD PTR[88+rdi]\n\tadc\tr9,r9\n\tmov\tQWORD PTR[rdi],r8\n\tadc\tr10,r10\n\tmov\tQWORD PTR[8+rdi],r9\n\tadc\tr11,r11\n\tmov\tQWORD PTR[16+rdi],r10\n\tadc\tr12,r12\n\tmov\tQWORD PTR[24+rdi],r11\n\tadc\tr13,r13\n\tmov\tQWORD PTR[32+rdi],r12\n\tadc\tr14,r14\n\tmov\tQWORD PTR[40+rdi],r13\n\tadc\tr15,r15\n\tmov\tQWORD PTR[48+rdi],r14\n\tadc\trax,rax\n\tmov\tQWORD PTR[56+rdi],r15\n\tadc\trbx,rbx\n\tmov\tQWORD PTR[64+rdi],rax\n\tadc\trbp,rbp\n\tmov\tQWORD PTR[72+rdi],rbx\n\tadc\trdx,rdx\n\tmov\tQWORD PTR[80+rdi],rbp\n\tmov\tQWORD PTR[88+rdi],rdx\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sqrx_382x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_382x::\nsqrx_382x\tENDP\nPUBLIC\tmulx_384\n\n\nALIGN\t32\nmulx_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mulx_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nmul_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n$L$SEH_body_mulx_384::\n\n\n\tmov\trbx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_384\n\n\tmov\tr15,QWORD PTR[rsp]\n\n\tmov\tr14,QWORD PTR[8+rsp]\n\n\tmov\tr13,QWORD PTR[16+rsp]\n\n\tmov\tr12,QWORD PTR[24+rsp]\n\n\tmov\trbx,QWORD PTR[32+rsp]\n\n\tmov\trbp,QWORD PTR[40+rsp]\n\n\tlea\trsp,QWORD PTR[48+rsp]\n\n$L$SEH_epilogue_mulx_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mulx_384::\nmulx_384\tENDP\n\n\nALIGN\t32\n__mulx_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trdx,QWORD PTR[rbx]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\n\tmulx\trcx,r9,r14\n\txor\trbp,rbp\n\n\tmulx\trax,r8,r15\n\tadcx\tr8,rcx\n\tmov\tQWORD PTR[rdi],r9\n\n\tmulx\trcx,r9,r10\n\tadcx\tr9,rax\n\n\tmulx\trax,r10,r11\n\tadcx\tr10,rcx\n\n\tmulx\trcx,r11,r12\n\tadcx\tr11,rax\n\n\tmulx\tr13,r12,r13\n\tmov\trdx,QWORD PTR[8+rbx]\n\tadcx\tr12,rcx\n\tadcx\tr13,rbp\n\tmulx\trcx,rax,r14\n\tadcx\trax,r8\n\tadox\tr9,rcx\n\tmov\tQWORD PTR[8+rdi],rax\n\n\tmulx\trcx,r8,r15\n\tadcx\tr8,r9\n\tadox\tr10,rcx\n\n\tmulx\trax,r9,QWORD PTR[((128+16))+rsi]\n\tadcx\tr9,r10\n\tadox\tr11,rax\n\n\tmulx\trcx,r10,QWORD PTR[((128+24))+rsi]\n\tadcx\tr10,r11\n\tadox\tr12,rcx\n\n\tmulx\trax,r11,QWORD PTR[((128+32))+rsi]\n\tadcx\tr11,r12\n\tadox\trax,r13\n\n\tmulx\tr13,r12,QWORD PTR[((128+40))+rsi]\n\tmov\trdx,QWORD PTR[16+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\tadcx\tr13,rbp\n\tmulx\trcx,rax,r14\n\tadcx\trax,r8\n\tadox\tr9,rcx\n\tmov\tQWORD PTR[16+rdi],rax\n\n\tmulx\trcx,r8,r15\n\tadcx\tr8,r9\n\tadox\tr10,rcx\n\n\tmulx\trax,r9,QWORD PTR[((128+16))+rsi]\n\tadcx\tr9,r10\n\tadox\tr11,rax\n\n\tmulx\trcx,r10,QWORD PTR[((128+24))+rsi]\n\tadcx\tr10,r11\n\tadox\tr12,rcx\n\n\tmulx\trax,r11,QWORD PTR[((128+32))+rsi]\n\tadcx\tr11,r12\n\tadox\trax,r13\n\n\tmulx\tr13,r12,QWORD PTR[((128+40))+rsi]\n\tmov\trdx,QWORD PTR[24+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\tadcx\tr13,rbp\n\tmulx\trcx,rax,r14\n\tadcx\trax,r8\n\tadox\tr9,rcx\n\tmov\tQWORD PTR[24+rdi],rax\n\n\tmulx\trcx,r8,r15\n\tadcx\tr8,r9\n\tadox\tr10,rcx\n\n\tmulx\trax,r9,QWORD PTR[((128+16))+rsi]\n\tadcx\tr9,r10\n\tadox\tr11,rax\n\n\tmulx\trcx,r10,QWORD PTR[((128+24))+rsi]\n\tadcx\tr10,r11\n\tadox\tr12,rcx\n\n\tmulx\trax,r11,QWORD PTR[((128+32))+rsi]\n\tadcx\tr11,r12\n\tadox\trax,r13\n\n\tmulx\tr13,r12,QWORD PTR[((128+40))+rsi]\n\tmov\trdx,QWORD PTR[32+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\tadcx\tr13,rbp\n\tmulx\trcx,rax,r14\n\tadcx\trax,r8\n\tadox\tr9,rcx\n\tmov\tQWORD PTR[32+rdi],rax\n\n\tmulx\trcx,r8,r15\n\tadcx\tr8,r9\n\tadox\tr10,rcx\n\n\tmulx\trax,r9,QWORD PTR[((128+16))+rsi]\n\tadcx\tr9,r10\n\tadox\tr11,rax\n\n\tmulx\trcx,r10,QWORD PTR[((128+24))+rsi]\n\tadcx\tr10,r11\n\tadox\tr12,rcx\n\n\tmulx\trax,r11,QWORD PTR[((128+32))+rsi]\n\tadcx\tr11,r12\n\tadox\trax,r13\n\n\tmulx\tr13,r12,QWORD PTR[((128+40))+rsi]\n\tmov\trdx,QWORD PTR[40+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\tadcx\tr13,rbp\n\tmulx\trcx,rax,r14\n\tadcx\trax,r8\n\tadox\tr9,rcx\n\tmov\tQWORD PTR[40+rdi],rax\n\n\tmulx\trcx,r8,r15\n\tadcx\tr8,r9\n\tadox\tr10,rcx\n\n\tmulx\trax,r9,QWORD PTR[((128+16))+rsi]\n\tadcx\tr9,r10\n\tadox\tr11,rax\n\n\tmulx\trcx,r10,QWORD PTR[((128+24))+rsi]\n\tadcx\tr10,r11\n\tadox\tr12,rcx\n\n\tmulx\trax,r11,QWORD PTR[((128+32))+rsi]\n\tadcx\tr11,r12\n\tadox\trax,r13\n\n\tmulx\tr13,r12,QWORD PTR[((128+40))+rsi]\n\tmov\trdx,rax\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\tadcx\tr13,rbp\n\tmov\tQWORD PTR[48+rdi],r8\n\tmov\tQWORD PTR[56+rdi],r9\n\tmov\tQWORD PTR[64+rdi],r10\n\tmov\tQWORD PTR[72+rdi],r11\n\tmov\tQWORD PTR[80+rdi],r12\n\tmov\tQWORD PTR[88+rdi],r13\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulx_384\tENDP\nPUBLIC\tsqrx_384\n\n\nALIGN\t32\nsqrx_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\nsqr_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tpush\trdi\n\n$L$SEH_body_sqrx_384::\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__sqrx_384\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sqrx_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_384::\nsqrx_384\tENDP\n\nALIGN\t32\n__sqrx_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\trdx,QWORD PTR[rsi]\n\tmov\tr14,QWORD PTR[8+rsi]\n\tmov\tr15,QWORD PTR[16+rsi]\n\tmov\trcx,QWORD PTR[24+rsi]\n\tmov\trbx,QWORD PTR[32+rsi]\n\n\n\tmulx\trdi,r8,r14\n\tmov\trbp,QWORD PTR[40+rsi]\n\tmulx\trax,r9,r15\n\tadd\tr9,rdi\n\tmulx\trdi,r10,rcx\n\tadc\tr10,rax\n\tmulx\trax,r11,rbx\n\tadc\tr11,rdi\n\tmulx\tr13,r12,rbp\n\tmov\trdx,r14\n\tadc\tr12,rax\n\tadc\tr13,0\n\n\n\txor\tr14,r14\n\tmulx\trax,rdi,r15\n\tadcx\tr10,rdi\n\tadox\tr11,rax\n\n\tmulx\trax,rdi,rcx\n\tadcx\tr11,rdi\n\tadox\tr12,rax\n\n\tmulx\trax,rdi,rbx\n\tadcx\tr12,rdi\n\tadox\tr13,rax\n\n\tmulx\trax,rdi,rbp\n\tmov\trdx,r15\n\tadcx\tr13,rdi\n\tadox\trax,r14\n\tadcx\tr14,rax\n\n\n\txor\tr15,r15\n\tmulx\trax,rdi,rcx\n\tadcx\tr12,rdi\n\tadox\tr13,rax\n\n\tmulx\trax,rdi,rbx\n\tadcx\tr13,rdi\n\tadox\tr14,rax\n\n\tmulx\trax,rdi,rbp\n\tmov\trdx,rcx\n\tadcx\tr14,rdi\n\tadox\trax,r15\n\tadcx\tr15,rax\n\n\n\txor\trcx,rcx\n\tmulx\trax,rdi,rbx\n\tadcx\tr14,rdi\n\tadox\tr15,rax\n\n\tmulx\trax,rdi,rbp\n\tmov\trdx,rbx\n\tadcx\tr15,rdi\n\tadox\trax,rcx\n\tadcx\trcx,rax\n\n\n\tmulx\trbx,rdi,rbp\n\tmov\trdx,QWORD PTR[rsi]\n\tadd\trcx,rdi\n\tmov\trdi,QWORD PTR[8+rsp]\n\tadc\trbx,0\n\n\n\txor\trbp,rbp\n\tadcx\tr8,r8\n\tadcx\tr9,r9\n\tadcx\tr10,r10\n\tadcx\tr11,r11\n\tadcx\tr12,r12\n\n\n\tmulx\trax,rdx,rdx\n\tmov\tQWORD PTR[rdi],rdx\n\tmov\trdx,QWORD PTR[8+rsi]\n\tadox\tr8,rax\n\tmov\tQWORD PTR[8+rdi],r8\n\n\tmulx\trax,r8,rdx\n\tmov\trdx,QWORD PTR[16+rsi]\n\tadox\tr9,r8\n\tadox\tr10,rax\n\tmov\tQWORD PTR[16+rdi],r9\n\tmov\tQWORD PTR[24+rdi],r10\n\n\tmulx\tr9,r8,rdx\n\tmov\trdx,QWORD PTR[24+rsi]\n\tadox\tr11,r8\n\tadox\tr12,r9\n\tadcx\tr13,r13\n\tadcx\tr14,r14\n\tmov\tQWORD PTR[32+rdi],r11\n\tmov\tQWORD PTR[40+rdi],r12\n\n\tmulx\tr9,r8,rdx\n\tmov\trdx,QWORD PTR[32+rsi]\n\tadox\tr13,r8\n\tadox\tr14,r9\n\tadcx\tr15,r15\n\tadcx\trcx,rcx\n\tmov\tQWORD PTR[48+rdi],r13\n\tmov\tQWORD PTR[56+rdi],r14\n\n\tmulx\tr9,r8,rdx\n\tmov\trdx,QWORD PTR[40+rsi]\n\tadox\tr15,r8\n\tadox\trcx,r9\n\tadcx\trbx,rbx\n\tadcx\trbp,rbp\n\tmov\tQWORD PTR[64+rdi],r15\n\tmov\tQWORD PTR[72+rdi],rcx\n\n\tmulx\tr9,r8,rdx\n\tadox\trbx,r8\n\tadox\trbp,r9\n\n\tmov\tQWORD PTR[80+rdi],rbx\n\tmov\tQWORD PTR[88+rdi],rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__sqrx_384\tENDP\n\n\n\nPUBLIC\tredcx_mont_384\n\n\nALIGN\t32\nredcx_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_redcx_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nredc_mont_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_redcx_mont_384::\n\n\n\tmov\trbx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_redcx_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_redcx_mont_384::\nredcx_mont_384\tENDP\n\n\n\n\nPUBLIC\tfromx_mont_384\n\n\nALIGN\t32\nfromx_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_fromx_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nfrom_mont_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_fromx_mont_384::\n\n\n\tmov\trbx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_by_1_mont_384\n\n\n\n\n\tmov\trax,r14\n\tmov\trcx,r15\n\tmov\trdx,r8\n\tmov\trbp,r9\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tmov\tr13,r10\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tmov\trsi,r11\n\tsbb\tr11,QWORD PTR[40+rbx]\n\n\tcmovc\tr14,rax\n\tcmovc\tr15,rcx\n\tcmovc\tr8,rdx\n\tmov\tQWORD PTR[rdi],r14\n\tcmovc\tr9,rbp\n\tmov\tQWORD PTR[8+rdi],r15\n\tcmovc\tr10,r13\n\tmov\tQWORD PTR[16+rdi],r8\n\tcmovc\tr11,rsi\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_fromx_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_fromx_mont_384::\nfromx_mont_384\tENDP\n\nALIGN\t32\n__mulx_by_1_mont_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\trdx,rcx\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\timul\trdx,r8\n\n\n\txor\tr14,r14\n\tmulx\trbp,rax,QWORD PTR[rbx]\n\tadcx\tr8,rax\n\tadox\tr9,rbp\n\n\tmulx\trbp,rax,QWORD PTR[8+rbx]\n\tadcx\tr9,rax\n\tadox\tr10,rbp\n\n\tmulx\trbp,rax,QWORD PTR[16+rbx]\n\tadcx\tr10,rax\n\tadox\tr11,rbp\n\n\tmulx\trbp,rax,QWORD PTR[24+rbx]\n\tadcx\tr11,rax\n\tadox\tr12,rbp\n\n\tmulx\trbp,rax,QWORD PTR[32+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\n\tmulx\trbp,rax,QWORD PTR[40+rbx]\n\tmov\trdx,rcx\n\tadcx\tr13,rax\n\tadox\trbp,r14\n\tadcx\tr14,rbp\n\timul\trdx,r9\n\n\n\txor\tr15,r15\n\tmulx\trbp,rax,QWORD PTR[rbx]\n\tadcx\tr9,rax\n\tadox\tr10,rbp\n\n\tmulx\trbp,rax,QWORD PTR[8+rbx]\n\tadcx\tr10,rax\n\tadox\tr11,rbp\n\n\tmulx\trbp,rax,QWORD PTR[16+rbx]\n\tadcx\tr11,rax\n\tadox\tr12,rbp\n\n\tmulx\trbp,rax,QWORD PTR[24+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\n\tmulx\trbp,rax,QWORD PTR[32+rbx]\n\tadcx\tr13,rax\n\tadox\tr14,rbp\n\n\tmulx\trbp,rax,QWORD PTR[40+rbx]\n\tmov\trdx,rcx\n\tadcx\tr14,rax\n\tadox\trbp,r15\n\tadcx\tr15,rbp\n\timul\trdx,r10\n\n\n\txor\tr8,r8\n\tmulx\trbp,rax,QWORD PTR[rbx]\n\tadcx\tr10,rax\n\tadox\tr11,rbp\n\n\tmulx\trbp,rax,QWORD PTR[8+rbx]\n\tadcx\tr11,rax\n\tadox\tr12,rbp\n\n\tmulx\trbp,rax,QWORD PTR[16+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\n\tmulx\trbp,rax,QWORD PTR[24+rbx]\n\tadcx\tr13,rax\n\tadox\tr14,rbp\n\n\tmulx\trbp,rax,QWORD PTR[32+rbx]\n\tadcx\tr14,rax\n\tadox\tr15,rbp\n\n\tmulx\trbp,rax,QWORD PTR[40+rbx]\n\tmov\trdx,rcx\n\tadcx\tr15,rax\n\tadox\trbp,r8\n\tadcx\tr8,rbp\n\timul\trdx,r11\n\n\n\txor\tr9,r9\n\tmulx\trbp,rax,QWORD PTR[rbx]\n\tadcx\tr11,rax\n\tadox\tr12,rbp\n\n\tmulx\trbp,rax,QWORD PTR[8+rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\n\tmulx\trbp,rax,QWORD PTR[16+rbx]\n\tadcx\tr13,rax\n\tadox\tr14,rbp\n\n\tmulx\trbp,rax,QWORD PTR[24+rbx]\n\tadcx\tr14,rax\n\tadox\tr15,rbp\n\n\tmulx\trbp,rax,QWORD PTR[32+rbx]\n\tadcx\tr15,rax\n\tadox\tr8,rbp\n\n\tmulx\trbp,rax,QWORD PTR[40+rbx]\n\tmov\trdx,rcx\n\tadcx\tr8,rax\n\tadox\trbp,r9\n\tadcx\tr9,rbp\n\timul\trdx,r12\n\n\n\txor\tr10,r10\n\tmulx\trbp,rax,QWORD PTR[rbx]\n\tadcx\tr12,rax\n\tadox\tr13,rbp\n\n\tmulx\trbp,rax,QWORD PTR[8+rbx]\n\tadcx\tr13,rax\n\tadox\tr14,rbp\n\n\tmulx\trbp,rax,QWORD PTR[16+rbx]\n\tadcx\tr14,rax\n\tadox\tr15,rbp\n\n\tmulx\trbp,rax,QWORD PTR[24+rbx]\n\tadcx\tr15,rax\n\tadox\tr8,rbp\n\n\tmulx\trbp,rax,QWORD PTR[32+rbx]\n\tadcx\tr8,rax\n\tadox\tr9,rbp\n\n\tmulx\trbp,rax,QWORD PTR[40+rbx]\n\tmov\trdx,rcx\n\tadcx\tr9,rax\n\tadox\trbp,r10\n\tadcx\tr10,rbp\n\timul\trdx,r13\n\n\n\txor\tr11,r11\n\tmulx\trbp,rax,QWORD PTR[rbx]\n\tadcx\tr13,rax\n\tadox\tr14,rbp\n\n\tmulx\trbp,rax,QWORD PTR[8+rbx]\n\tadcx\tr14,rax\n\tadox\tr15,rbp\n\n\tmulx\trbp,rax,QWORD PTR[16+rbx]\n\tadcx\tr15,rax\n\tadox\tr8,rbp\n\n\tmulx\trbp,rax,QWORD PTR[24+rbx]\n\tadcx\tr8,rax\n\tadox\tr9,rbp\n\n\tmulx\trbp,rax,QWORD PTR[32+rbx]\n\tadcx\tr9,rax\n\tadox\tr10,rbp\n\n\tmulx\trbp,rax,QWORD PTR[40+rbx]\n\tmov\trdx,rcx\n\tadcx\tr10,rax\n\tadox\trbp,r11\n\tadcx\tr11,rbp\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__mulx_by_1_mont_384\tENDP\n\n\nALIGN\t32\n__redx_tail_mont_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\tadd\tr14,QWORD PTR[48+rsi]\n\tmov\trax,r14\n\tadc\tr15,QWORD PTR[56+rsi]\n\tadc\tr8,QWORD PTR[64+rsi]\n\tadc\tr9,QWORD PTR[72+rsi]\n\tmov\trcx,r15\n\tadc\tr10,QWORD PTR[80+rsi]\n\tadc\tr11,QWORD PTR[88+rsi]\n\tsbb\tr12,r12\n\n\n\n\n\tmov\trdx,r8\n\tmov\trbp,r9\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tmov\tr13,r10\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tmov\trsi,r11\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\tr12,0\n\n\tcmovc\tr14,rax\n\tcmovc\tr15,rcx\n\tcmovc\tr8,rdx\n\tmov\tQWORD PTR[rdi],r14\n\tcmovc\tr9,rbp\n\tmov\tQWORD PTR[8+rdi],r15\n\tcmovc\tr10,r13\n\tmov\tQWORD PTR[16+rdi],r8\n\tcmovc\tr11,rsi\n\tmov\tQWORD PTR[24+rdi],r9\n\tmov\tQWORD PTR[32+rdi],r10\n\tmov\tQWORD PTR[40+rdi],r11\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n__redx_tail_mont_384\tENDP\n\nPUBLIC\tsgn0x_pty_mont_384\n\n\nALIGN\t32\nsgn0x_pty_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sgn0x_pty_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nsgn0_pty_mont_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sgn0x_pty_mont_384::\n\n\n\tmov\trbx,rsi\n\tlea\trsi,QWORD PTR[rdi]\n\tmov\trcx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_by_1_mont_384\n\n\txor\trax,rax\n\tmov\tr13,r14\n\tadd\tr14,r14\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trax,0\n\n\tsub\tr14,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\trax,0\n\n\tnot\trax\n\tand\tr13,1\n\tand\trax,2\n\tor\trax,r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sgn0x_pty_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sgn0x_pty_mont_384::\nsgn0x_pty_mont_384\tENDP\n\nPUBLIC\tsgn0x_pty_mont_384x\n\n\nALIGN\t32\nsgn0x_pty_mont_384x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sgn0x_pty_mont_384x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nsgn0_pty_mont_384x$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,8\n\n$L$SEH_body_sgn0x_pty_mont_384x::\n\n\n\tmov\trbx,rsi\n\tlea\trsi,QWORD PTR[48+rdi]\n\tmov\trcx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tcall\t__mulx_by_1_mont_384\n\n\tmov\tr12,r14\n\tor\tr14,r15\n\tor\tr14,r8\n\tor\tr14,r9\n\tor\tr14,r10\n\tor\tr14,r11\n\n\tlea\trsi,QWORD PTR[rdi]\n\txor\trdi,rdi\n\tmov\tr13,r12\n\tadd\tr12,r12\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trdi,0\n\n\tsub\tr12,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\trdi,0\n\n\tmov\tQWORD PTR[rsp],r14\n\tnot\trdi\n\tand\tr13,1\n\tand\trdi,2\n\tor\trdi,r13\n\n\tcall\t__mulx_by_1_mont_384\n\n\tmov\tr12,r14\n\tor\tr14,r15\n\tor\tr14,r8\n\tor\tr14,r9\n\tor\tr14,r10\n\tor\tr14,r11\n\n\txor\trax,rax\n\tmov\tr13,r12\n\tadd\tr12,r12\n\tadc\tr15,r15\n\tadc\tr8,r8\n\tadc\tr9,r9\n\tadc\tr10,r10\n\tadc\tr11,r11\n\tadc\trax,0\n\n\tsub\tr12,QWORD PTR[rbx]\n\tsbb\tr15,QWORD PTR[8+rbx]\n\tsbb\tr8,QWORD PTR[16+rbx]\n\tsbb\tr9,QWORD PTR[24+rbx]\n\tsbb\tr10,QWORD PTR[32+rbx]\n\tsbb\tr11,QWORD PTR[40+rbx]\n\tsbb\trax,0\n\n\tmov\tr12,QWORD PTR[rsp]\n\n\tnot\trax\n\n\ttest\tr14,r14\n\tcmovz\tr13,rdi\n\n\ttest\tr12,r12\n\tcmovnz\trax,rdi\n\n\tand\tr13,1\n\tand\trax,2\n\tor\trax,r13\n\n\tmov\tr15,QWORD PTR[8+rsp]\n\n\tmov\tr14,QWORD PTR[16+rsp]\n\n\tmov\tr13,QWORD PTR[24+rsp]\n\n\tmov\tr12,QWORD PTR[32+rsp]\n\n\tmov\trbx,QWORD PTR[40+rsp]\n\n\tmov\trbp,QWORD PTR[48+rsp]\n\n\tlea\trsp,QWORD PTR[56+rsp]\n\n$L$SEH_epilogue_sgn0x_pty_mont_384x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sgn0x_pty_mont_384x::\nsgn0x_pty_mont_384x\tENDP\nPUBLIC\tmulx_mont_384\n\n\nALIGN\t32\nmulx_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_mulx_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\nmul_mont_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tlea\trsp,QWORD PTR[((-24))+rsp]\n\n$L$SEH_body_mulx_mont_384::\n\n\n\tmov\trbx,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rdx]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\tr12,QWORD PTR[24+rsi]\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\trdi,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[40+rsi]\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\tmov\tQWORD PTR[rsp],r8\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_384\n\n\tmov\tr15,QWORD PTR[24+rsp]\n\n\tmov\tr14,QWORD PTR[32+rsp]\n\n\tmov\tr13,QWORD PTR[40+rsp]\n\n\tmov\tr12,QWORD PTR[48+rsp]\n\n\tmov\trbx,QWORD PTR[56+rsp]\n\n\tmov\trbp,QWORD PTR[64+rsp]\n\n\tlea\trsp,QWORD PTR[72+rsp]\n\n$L$SEH_epilogue_mulx_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_mulx_mont_384::\nmulx_mont_384\tENDP\n\nALIGN\t32\n__mulx_mont_384\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\n\tmulx\tr10,r14,r15\n\tmulx\tr11,r15,rax\n\tadd\tr9,r14\n\tmulx\tr12,rax,r12\n\tadc\tr10,r15\n\tmulx\tr13,rdi,rdi\n\tadc\tr11,rax\n\tmulx\tr14,rbp,rbp\n\tmov\trdx,QWORD PTR[8+rbx]\n\tadc\tr12,rdi\n\tadc\tr13,rbp\n\tadc\tr14,0\n\txor\tr15,r15\n\n\tmov\tQWORD PTR[16+rsp],r8\n\timul\tr8,QWORD PTR[8+rsp]\n\n\n\txor\trax,rax\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr9,rdi\n\tadcx\tr10,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr10,rdi\n\tadcx\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr11,rdi\n\tadcx\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r8\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\tadox\tr15,rax\n\tadox\trax,rax\n\n\n\txor\tr8,r8\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\trdi,QWORD PTR[16+rsp]\n\tadox\tr9,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr9,rdi\n\tadox\tr10,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr10,rdi\n\tadox\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[16+rbx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\tadcx\tr14,r8\n\tadox\tr15,r8\n\tadcx\tr15,r8\n\tadox\trax,r8\n\tadcx\trax,r8\n\tmov\tQWORD PTR[16+rsp],r9\n\timul\tr9,QWORD PTR[8+rsp]\n\n\n\txor\tr8,r8\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr10,rdi\n\tadcx\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr11,rdi\n\tadcx\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r9\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\tadox\trax,r8\n\tadox\tr8,r8\n\n\n\txor\tr9,r9\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\trdi,QWORD PTR[16+rsp]\n\tadox\tr10,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr10,rdi\n\tadox\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[24+rbx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\tadcx\tr15,r9\n\tadox\trax,r9\n\tadcx\trax,r9\n\tadox\tr8,r9\n\tadcx\tr8,r9\n\tmov\tQWORD PTR[16+rsp],r10\n\timul\tr10,QWORD PTR[8+rsp]\n\n\n\txor\tr9,r9\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr11,rdi\n\tadcx\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r10\n\tadox\trax,rdi\n\tadcx\tr8,rbp\n\tadox\tr8,r9\n\tadox\tr9,r9\n\n\n\txor\tr10,r10\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\trdi,QWORD PTR[16+rsp]\n\tadox\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[32+rbx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\tadcx\trax,r10\n\tadox\tr8,r10\n\tadcx\tr8,r10\n\tadox\tr9,r10\n\tadcx\tr9,r10\n\tmov\tQWORD PTR[16+rsp],r11\n\timul\tr11,QWORD PTR[8+rsp]\n\n\n\txor\tr10,r10\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\trax,rdi\n\tadcx\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r11\n\tadox\tr8,rdi\n\tadcx\tr9,rbp\n\tadox\tr9,r10\n\tadox\tr10,r10\n\n\n\txor\tr11,r11\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\trdi,QWORD PTR[16+rsp]\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[40+rbx]\n\tadcx\trax,rdi\n\tadox\tr8,rbp\n\tadcx\tr8,r11\n\tadox\tr9,r11\n\tadcx\tr9,r11\n\tadox\tr10,r11\n\tadcx\tr10,r11\n\tmov\tQWORD PTR[16+rsp],r12\n\timul\tr12,QWORD PTR[8+rsp]\n\n\n\txor\tr11,r11\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\trax,rdi\n\tadcx\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr8,rdi\n\tadcx\tr9,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r12\n\tadox\tr9,rdi\n\tadcx\tr10,rbp\n\tadox\tr10,r11\n\tadox\tr11,r11\n\n\n\txor\tr12,r12\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\trdi,QWORD PTR[16+rsp]\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\trax,rdi\n\tadox\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,r13\n\tadcx\tr8,rdi\n\tadox\tr9,rbp\n\tadcx\tr9,r12\n\tadox\tr10,r12\n\tadcx\tr10,r12\n\tadox\tr11,r12\n\tadcx\tr11,r12\n\timul\trdx,QWORD PTR[8+rsp]\n\tmov\trbx,QWORD PTR[24+rsp]\n\n\n\txor\tr12,r12\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\trax,rdi\n\tadox\tr8,rbp\n\tmov\tr13,r15\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr8,rdi\n\tadox\tr9,rbp\n\tmov\trsi,rax\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tadcx\tr9,rdi\n\tadox\tr10,rbp\n\tmov\trdx,r14\n\tadcx\tr10,r12\n\tadox\tr11,r12\n\tlea\trcx,QWORD PTR[128+rcx]\n\tmov\tr12,r8\n\tadc\tr11,0\n\n\n\n\n\tsub\tr14,QWORD PTR[rcx]\n\tsbb\tr15,QWORD PTR[8+rcx]\n\tmov\trdi,r9\n\tsbb\trax,QWORD PTR[16+rcx]\n\tsbb\tr8,QWORD PTR[24+rcx]\n\tsbb\tr9,QWORD PTR[32+rcx]\n\tmov\trbp,r10\n\tsbb\tr10,QWORD PTR[40+rcx]\n\tsbb\tr11,0\n\n\tcmovnc\trdx,r14\n\tcmovc\tr15,r13\n\tcmovc\trax,rsi\n\tcmovnc\tr12,r8\n\tmov\tQWORD PTR[rbx],rdx\n\tcmovnc\trdi,r9\n\tmov\tQWORD PTR[8+rbx],r15\n\tcmovnc\trbp,r10\n\tmov\tQWORD PTR[16+rbx],rax\n\tmov\tQWORD PTR[24+rbx],r12\n\tmov\tQWORD PTR[32+rbx],rdi\n\tmov\tQWORD PTR[40+rbx],rbp\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trsi\n\tlfence\n\tjmp\trsi\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n__mulx_mont_384\tENDP\nPUBLIC\tsqrx_mont_384\n\n\nALIGN\t32\nsqrx_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nsqr_mont_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tlea\trsp,QWORD PTR[((-24))+rsp]\n\n$L$SEH_body_sqrx_mont_384::\n\n\n\tmov\tr8,rcx\n\tlea\trcx,QWORD PTR[((-128))+rdx]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\tr12,QWORD PTR[24+rsi]\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\trdi,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[40+rsi]\n\n\tlea\trbx,QWORD PTR[rsi]\n\tmov\tQWORD PTR[rsp],r8\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\n\tmulx\tr9,r8,rdx\n\tcall\t__mulx_mont_384\n\n\tmov\tr15,QWORD PTR[24+rsp]\n\n\tmov\tr14,QWORD PTR[32+rsp]\n\n\tmov\tr13,QWORD PTR[40+rsp]\n\n\tmov\tr12,QWORD PTR[48+rsp]\n\n\tmov\trbx,QWORD PTR[56+rsp]\n\n\tmov\trbp,QWORD PTR[64+rsp]\n\n\tlea\trsp,QWORD PTR[72+rsp]\n\n$L$SEH_epilogue_sqrx_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_mont_384::\nsqrx_mont_384\tENDP\n\nPUBLIC\tsqrx_n_mul_mont_384\n\n\nALIGN\t32\nsqrx_n_mul_mont_384\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_n_mul_mont_384::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\n\tmov\tr9,QWORD PTR[48+rsp]\nsqr_n_mul_mont_384$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tlea\trsp,QWORD PTR[((-40))+rsp]\n\n$L$SEH_body_sqrx_n_mul_mont_384::\n\n\n\tmov\tr10,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\trbx,rsi\n\tmov\tr12,QWORD PTR[24+rsi]\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\trdi,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[40+rsi]\n\n\tmov\tQWORD PTR[rsp],r8\n\tmov\tQWORD PTR[24+rsp],r9\n\tmovq\txmm2,QWORD PTR[r9]\n\n$L$oop_sqrx_384::\n\tmovd\txmm1,r10d\n\tlea\trsi,QWORD PTR[((-128))+rbx]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr9,r8,rdx\n\tcall\t__mulx_mont_384\n\n\tmovd\tr10d,xmm1\n\tdec\tr10d\n\tjnz\t$L$oop_sqrx_384\n\n\tmov\tr14,rdx\nDB\t102,72,15,126,210\n\tlea\trsi,QWORD PTR[((-128))+rbx]\n\tmov\trbx,QWORD PTR[24+rsp]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_384\n\n\tmov\tr15,QWORD PTR[40+rsp]\n\n\tmov\tr14,QWORD PTR[48+rsp]\n\n\tmov\tr13,QWORD PTR[56+rsp]\n\n\tmov\tr12,QWORD PTR[64+rsp]\n\n\tmov\trbx,QWORD PTR[72+rsp]\n\n\tmov\trbp,QWORD PTR[80+rsp]\n\n\tlea\trsp,QWORD PTR[88+rsp]\n\n$L$SEH_epilogue_sqrx_n_mul_mont_384::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_n_mul_mont_384::\nsqrx_n_mul_mont_384\tENDP\n\nPUBLIC\tsqrx_n_mul_mont_383\n\n\nALIGN\t32\nsqrx_n_mul_mont_383\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_n_mul_mont_383::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\n\tmov\tr8,QWORD PTR[40+rsp]\n\tmov\tr9,QWORD PTR[48+rsp]\nsqr_n_mul_mont_383$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tlea\trsp,QWORD PTR[((-40))+rsp]\n\n$L$SEH_body_sqrx_n_mul_mont_383::\n\n\n\tmov\tr10,rdx\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\trdx,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\trbx,rsi\n\tmov\tr12,QWORD PTR[24+rsi]\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\trdi,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[40+rsi]\n\n\tmov\tQWORD PTR[rsp],r8\n\tmov\tQWORD PTR[24+rsp],r9\n\tmovq\txmm2,QWORD PTR[r9]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n$L$oop_sqrx_383::\n\tmovd\txmm1,r10d\n\tlea\trsi,QWORD PTR[((-128))+rbx]\n\n\tmulx\tr9,r8,rdx\n\tcall\t__mulx_mont_383_nonred\n\n\tmovd\tr10d,xmm1\n\tdec\tr10d\n\tjnz\t$L$oop_sqrx_383\n\n\tmov\tr14,rdx\nDB\t102,72,15,126,210\n\tlea\trsi,QWORD PTR[((-128))+rbx]\n\tmov\trbx,QWORD PTR[24+rsp]\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_384\n\n\tmov\tr15,QWORD PTR[40+rsp]\n\n\tmov\tr14,QWORD PTR[48+rsp]\n\n\tmov\tr13,QWORD PTR[56+rsp]\n\n\tmov\tr12,QWORD PTR[64+rsp]\n\n\tmov\trbx,QWORD PTR[72+rsp]\n\n\tmov\trbp,QWORD PTR[80+rsp]\n\n\tlea\trsp,QWORD PTR[88+rsp]\n\n$L$SEH_epilogue_sqrx_n_mul_mont_383::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_n_mul_mont_383::\nsqrx_n_mul_mont_383\tENDP\n\nALIGN\t32\n__mulx_mont_383_nonred\tPROC PRIVATE\n\tDB\t243,15,30,250\n\n\n\tmulx\tr10,r14,r15\n\tmulx\tr11,r15,rax\n\tadd\tr9,r14\n\tmulx\tr12,rax,r12\n\tadc\tr10,r15\n\tmulx\tr13,rdi,rdi\n\tadc\tr11,rax\n\tmulx\tr14,rbp,rbp\n\tmov\trdx,QWORD PTR[8+rbx]\n\tadc\tr12,rdi\n\tadc\tr13,rbp\n\tadc\tr14,0\n\tmov\trax,r8\n\timul\tr8,QWORD PTR[8+rsp]\n\n\n\txor\tr15,r15\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr9,rdi\n\tadcx\tr10,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr10,rdi\n\tadcx\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr11,rdi\n\tadcx\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r8\n\tadox\tr14,rdi\n\tadcx\trbp,r15\n\tadox\tr15,rbp\n\n\n\txor\tr8,r8\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\trax,rdi\n\tadox\tr9,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr9,rdi\n\tadox\tr10,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr10,rdi\n\tadox\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[16+rbx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\tadcx\tr14,rax\n\tadox\tr15,rax\n\tadcx\tr15,rax\n\tmov\tr8,r9\n\timul\tr9,QWORD PTR[8+rsp]\n\n\n\txor\trax,rax\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr10,rdi\n\tadcx\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr11,rdi\n\tadcx\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r9\n\tadox\tr15,rdi\n\tadcx\trbp,rax\n\tadox\trax,rbp\n\n\n\txor\tr9,r9\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\tr8,rdi\n\tadox\tr10,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr10,rdi\n\tadox\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[24+rbx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\tadcx\tr15,r8\n\tadox\trax,r8\n\tadcx\trax,r8\n\tmov\tr9,r10\n\timul\tr10,QWORD PTR[8+rsp]\n\n\n\txor\tr8,r8\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr11,rdi\n\tadcx\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r10\n\tadox\trax,rdi\n\tadcx\trbp,r8\n\tadox\tr8,rbp\n\n\n\txor\tr10,r10\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\tr9,rdi\n\tadox\tr11,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[32+rbx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\tadcx\trax,r9\n\tadox\tr8,r9\n\tadcx\tr8,r9\n\tmov\tr10,r11\n\timul\tr11,QWORD PTR[8+rsp]\n\n\n\txor\tr9,r9\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr12,rdi\n\tadcx\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\trax,rdi\n\tadcx\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r11\n\tadox\tr8,rdi\n\tadcx\trbp,r9\n\tadox\tr9,rbp\n\n\n\txor\tr11,r11\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\tr10,rdi\n\tadox\tr12,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr12,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,QWORD PTR[40+rbx]\n\tadcx\trax,rdi\n\tadox\tr8,rbp\n\tadcx\tr8,r10\n\tadox\tr9,r10\n\tadcx\tr9,r10\n\tmov\tr11,r12\n\timul\tr12,QWORD PTR[8+rsp]\n\n\n\txor\tr10,r10\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rsi]\n\tadox\tr13,rdi\n\tadcx\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rsi]\n\tadox\tr14,rdi\n\tadcx\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rsi]\n\tadox\tr15,rdi\n\tadcx\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rsi]\n\tadox\trax,rdi\n\tadcx\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rsi]\n\tadox\tr8,rdi\n\tadcx\tr9,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rsi]\n\tmov\trdx,r12\n\tadox\tr9,rdi\n\tadcx\trbp,r10\n\tadox\tr10,rbp\n\n\n\txor\tr12,r12\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\tr11,rdi\n\tadox\tr13,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\trax,rdi\n\tadox\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,r13\n\tadcx\tr8,rdi\n\tadox\tr9,rbp\n\tadcx\tr9,r11\n\tadox\tr10,r11\n\tadcx\tr10,r11\n\timul\trdx,QWORD PTR[8+rsp]\n\tmov\trbx,QWORD PTR[24+rsp]\n\n\n\txor\tr12,r12\n\tmulx\trbp,rdi,QWORD PTR[((0+128))+rcx]\n\tadcx\tr13,rdi\n\tadox\tr14,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((8+128))+rcx]\n\tadcx\tr14,rdi\n\tadox\tr15,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((16+128))+rcx]\n\tadcx\tr15,rdi\n\tadox\trax,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((24+128))+rcx]\n\tadcx\trax,rdi\n\tadox\tr8,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((32+128))+rcx]\n\tadcx\tr8,rdi\n\tadox\tr9,rbp\n\n\tmulx\trbp,rdi,QWORD PTR[((40+128))+rcx]\n\tmov\trdx,r14\n\tadcx\tr9,rdi\n\tadox\tr10,rbp\n\tadc\tr10,0\n\tmov\tr12,r8\n\n\tmov\tQWORD PTR[rbx],r14\n\tmov\tQWORD PTR[8+rbx],r15\n\tmov\tQWORD PTR[16+rbx],rax\n\tmov\trdi,r9\n\tmov\tQWORD PTR[24+rbx],r8\n\tmov\tQWORD PTR[32+rbx],r9\n\tmov\tQWORD PTR[40+rbx],r10\n\tmov\trbp,r10\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trsi\n\tlfence\n\tjmp\trsi\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n__mulx_mont_383_nonred\tENDP\nPUBLIC\tsqrx_mont_382x\n\n\nALIGN\t32\nsqrx_mont_382x\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_sqrx_mont_382x::\n\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n\tmov\trcx,r9\nsqr_mont_382x$1::\n\tpush\trbp\n\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tsub\trsp,136\n\n$L$SEH_body_sqrx_mont_382x::\n\n\n\tmov\tQWORD PTR[rsp],rcx\n\tmov\trcx,rdx\n\tmov\tQWORD PTR[16+rsp],rdi\n\tmov\tQWORD PTR[24+rsp],rsi\n\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rsi]\n\tmov\tr9,QWORD PTR[8+rsi]\n\tmov\tr10,QWORD PTR[16+rsi]\n\tmov\tr11,QWORD PTR[24+rsi]\n\tmov\tr12,QWORD PTR[32+rsi]\n\tmov\tr13,QWORD PTR[40+rsi]\n\n\tmov\tr14,r8\n\tadd\tr8,QWORD PTR[48+rsi]\n\tmov\tr15,r9\n\tadc\tr9,QWORD PTR[56+rsi]\n\tmov\trax,r10\n\tadc\tr10,QWORD PTR[64+rsi]\n\tmov\trdx,r11\n\tadc\tr11,QWORD PTR[72+rsi]\n\tmov\trbx,r12\n\tadc\tr12,QWORD PTR[80+rsi]\n\tmov\trbp,r13\n\tadc\tr13,QWORD PTR[88+rsi]\n\n\tsub\tr14,QWORD PTR[48+rsi]\n\tsbb\tr15,QWORD PTR[56+rsi]\n\tsbb\trax,QWORD PTR[64+rsi]\n\tsbb\trdx,QWORD PTR[72+rsi]\n\tsbb\trbx,QWORD PTR[80+rsi]\n\tsbb\trbp,QWORD PTR[88+rsi]\n\tsbb\trdi,rdi\n\n\tmov\tQWORD PTR[((32+0))+rsp],r8\n\tmov\tQWORD PTR[((32+8))+rsp],r9\n\tmov\tQWORD PTR[((32+16))+rsp],r10\n\tmov\tQWORD PTR[((32+24))+rsp],r11\n\tmov\tQWORD PTR[((32+32))+rsp],r12\n\tmov\tQWORD PTR[((32+40))+rsp],r13\n\n\tmov\tQWORD PTR[((32+48))+rsp],r14\n\tmov\tQWORD PTR[((32+56))+rsp],r15\n\tmov\tQWORD PTR[((32+64))+rsp],rax\n\tmov\tQWORD PTR[((32+72))+rsp],rdx\n\tmov\tQWORD PTR[((32+80))+rsp],rbx\n\tmov\tQWORD PTR[((32+88))+rsp],rbp\n\tmov\tQWORD PTR[((32+96))+rsp],rdi\n\n\n\n\tlea\trbx,QWORD PTR[48+rsi]\n\n\tmov\trdx,QWORD PTR[48+rsi]\n\tmov\tr14,QWORD PTR[rsi]\n\tmov\tr15,QWORD PTR[8+rsi]\n\tmov\trax,QWORD PTR[16+rsi]\n\tmov\tr12,QWORD PTR[24+rsi]\n\tmov\trdi,QWORD PTR[32+rsi]\n\tmov\trbp,QWORD PTR[40+rsi]\n\tlea\trsi,QWORD PTR[((-128))+rsi]\n\tlea\trcx,QWORD PTR[((-128))+rcx]\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_383_nonred\n\tadd\trdx,rdx\n\tadc\tr15,r15\n\tadc\trax,rax\n\tadc\tr12,r12\n\tadc\trdi,rdi\n\tadc\trbp,rbp\n\n\tmov\tQWORD PTR[48+rbx],rdx\n\tmov\tQWORD PTR[56+rbx],r15\n\tmov\tQWORD PTR[64+rbx],rax\n\tmov\tQWORD PTR[72+rbx],r12\n\tmov\tQWORD PTR[80+rbx],rdi\n\tmov\tQWORD PTR[88+rbx],rbp\n\n\tlea\trsi,QWORD PTR[((32-128))+rsp]\n\tlea\trbx,QWORD PTR[((32+48))+rsp]\n\n\tmov\trdx,QWORD PTR[((32+48))+rsp]\n\tmov\tr14,QWORD PTR[((32+0))+rsp]\n\tmov\tr15,QWORD PTR[((32+8))+rsp]\n\tmov\trax,QWORD PTR[((32+16))+rsp]\n\tmov\tr12,QWORD PTR[((32+24))+rsp]\n\tmov\trdi,QWORD PTR[((32+32))+rsp]\n\tmov\trbp,QWORD PTR[((32+40))+rsp]\n\n\n\n\tmulx\tr9,r8,r14\n\tcall\t__mulx_mont_383_nonred\n\tmov\tr14,QWORD PTR[((32+96))+rsp]\n\tlea\trcx,QWORD PTR[128+rcx]\n\tmov\tr8,QWORD PTR[((32+0))+rsp]\n\tand\tr8,r14\n\tmov\tr9,QWORD PTR[((32+8))+rsp]\n\tand\tr9,r14\n\tmov\tr10,QWORD PTR[((32+16))+rsp]\n\tand\tr10,r14\n\tmov\tr11,QWORD PTR[((32+24))+rsp]\n\tand\tr11,r14\n\tmov\tr13,QWORD PTR[((32+32))+rsp]\n\tand\tr13,r14\n\tand\tr14,QWORD PTR[((32+40))+rsp]\n\n\tsub\trdx,r8\n\tmov\tr8,QWORD PTR[rcx]\n\tsbb\tr15,r9\n\tmov\tr9,QWORD PTR[8+rcx]\n\tsbb\trax,r10\n\tmov\tr10,QWORD PTR[16+rcx]\n\tsbb\tr12,r11\n\tmov\tr11,QWORD PTR[24+rcx]\n\tsbb\trdi,r13\n\tmov\tr13,QWORD PTR[32+rcx]\n\tsbb\trbp,r14\n\tsbb\tr14,r14\n\n\tand\tr8,r14\n\tand\tr9,r14\n\tand\tr10,r14\n\tand\tr11,r14\n\tand\tr13,r14\n\tand\tr14,QWORD PTR[40+rcx]\n\n\tadd\trdx,r8\n\tadc\tr15,r9\n\tadc\trax,r10\n\tadc\tr12,r11\n\tadc\trdi,r13\n\tadc\trbp,r14\n\n\tmov\tQWORD PTR[rbx],rdx\n\tmov\tQWORD PTR[8+rbx],r15\n\tmov\tQWORD PTR[16+rbx],rax\n\tmov\tQWORD PTR[24+rbx],r12\n\tmov\tQWORD PTR[32+rbx],rdi\n\tmov\tQWORD PTR[40+rbx],rbp\n\tlea\tr8,QWORD PTR[136+rsp]\n\tmov\tr15,QWORD PTR[r8]\n\n\tmov\tr14,QWORD PTR[8+r8]\n\n\tmov\tr13,QWORD PTR[16+r8]\n\n\tmov\tr12,QWORD PTR[24+r8]\n\n\tmov\trbx,QWORD PTR[32+r8]\n\n\tmov\trbp,QWORD PTR[40+r8]\n\n\tlea\trsp,QWORD PTR[48+r8]\n\n$L$SEH_epilogue_sqrx_mont_382x::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_sqrx_mont_382x::\nsqrx_mont_382x\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_mulx_mont_384x\n\tDD\timagerel $L$SEH_body_mulx_mont_384x\n\tDD\timagerel $L$SEH_info_mulx_mont_384x_prologue\n\n\tDD\timagerel $L$SEH_body_mulx_mont_384x\n\tDD\timagerel $L$SEH_epilogue_mulx_mont_384x\n\tDD\timagerel $L$SEH_info_mulx_mont_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_mulx_mont_384x\n\tDD\timagerel $L$SEH_end_mulx_mont_384x\n\tDD\timagerel $L$SEH_info_mulx_mont_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_mont_384x\n\tDD\timagerel $L$SEH_body_sqrx_mont_384x\n\tDD\timagerel $L$SEH_info_sqrx_mont_384x_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_mont_384x\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_384x\n\tDD\timagerel $L$SEH_info_sqrx_mont_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_384x\n\tDD\timagerel $L$SEH_end_sqrx_mont_384x\n\tDD\timagerel $L$SEH_info_sqrx_mont_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mulx_382x\n\tDD\timagerel $L$SEH_body_mulx_382x\n\tDD\timagerel $L$SEH_info_mulx_382x_prologue\n\n\tDD\timagerel $L$SEH_body_mulx_382x\n\tDD\timagerel $L$SEH_epilogue_mulx_382x\n\tDD\timagerel $L$SEH_info_mulx_382x_body\n\n\tDD\timagerel $L$SEH_epilogue_mulx_382x\n\tDD\timagerel $L$SEH_end_mulx_382x\n\tDD\timagerel $L$SEH_info_mulx_382x_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_382x\n\tDD\timagerel $L$SEH_body_sqrx_382x\n\tDD\timagerel $L$SEH_info_sqrx_382x_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_382x\n\tDD\timagerel $L$SEH_epilogue_sqrx_382x\n\tDD\timagerel $L$SEH_info_sqrx_382x_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_382x\n\tDD\timagerel $L$SEH_end_sqrx_382x\n\tDD\timagerel $L$SEH_info_sqrx_382x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mulx_384\n\tDD\timagerel $L$SEH_body_mulx_384\n\tDD\timagerel $L$SEH_info_mulx_384_prologue\n\n\tDD\timagerel $L$SEH_body_mulx_384\n\tDD\timagerel $L$SEH_epilogue_mulx_384\n\tDD\timagerel $L$SEH_info_mulx_384_body\n\n\tDD\timagerel $L$SEH_epilogue_mulx_384\n\tDD\timagerel $L$SEH_end_mulx_384\n\tDD\timagerel $L$SEH_info_mulx_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_384\n\tDD\timagerel $L$SEH_body_sqrx_384\n\tDD\timagerel $L$SEH_info_sqrx_384_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_384\n\tDD\timagerel $L$SEH_epilogue_sqrx_384\n\tDD\timagerel $L$SEH_info_sqrx_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_384\n\tDD\timagerel $L$SEH_end_sqrx_384\n\tDD\timagerel $L$SEH_info_sqrx_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_redcx_mont_384\n\tDD\timagerel $L$SEH_body_redcx_mont_384\n\tDD\timagerel $L$SEH_info_redcx_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_redcx_mont_384\n\tDD\timagerel $L$SEH_epilogue_redcx_mont_384\n\tDD\timagerel $L$SEH_info_redcx_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_redcx_mont_384\n\tDD\timagerel $L$SEH_end_redcx_mont_384\n\tDD\timagerel $L$SEH_info_redcx_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_fromx_mont_384\n\tDD\timagerel $L$SEH_body_fromx_mont_384\n\tDD\timagerel $L$SEH_info_fromx_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_fromx_mont_384\n\tDD\timagerel $L$SEH_epilogue_fromx_mont_384\n\tDD\timagerel $L$SEH_info_fromx_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_fromx_mont_384\n\tDD\timagerel $L$SEH_end_fromx_mont_384\n\tDD\timagerel $L$SEH_info_fromx_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sgn0x_pty_mont_384\n\tDD\timagerel $L$SEH_body_sgn0x_pty_mont_384\n\tDD\timagerel $L$SEH_info_sgn0x_pty_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_sgn0x_pty_mont_384\n\tDD\timagerel $L$SEH_epilogue_sgn0x_pty_mont_384\n\tDD\timagerel $L$SEH_info_sgn0x_pty_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sgn0x_pty_mont_384\n\tDD\timagerel $L$SEH_end_sgn0x_pty_mont_384\n\tDD\timagerel $L$SEH_info_sgn0x_pty_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sgn0x_pty_mont_384x\n\tDD\timagerel $L$SEH_body_sgn0x_pty_mont_384x\n\tDD\timagerel $L$SEH_info_sgn0x_pty_mont_384x_prologue\n\n\tDD\timagerel $L$SEH_body_sgn0x_pty_mont_384x\n\tDD\timagerel $L$SEH_epilogue_sgn0x_pty_mont_384x\n\tDD\timagerel $L$SEH_info_sgn0x_pty_mont_384x_body\n\n\tDD\timagerel $L$SEH_epilogue_sgn0x_pty_mont_384x\n\tDD\timagerel $L$SEH_end_sgn0x_pty_mont_384x\n\tDD\timagerel $L$SEH_info_sgn0x_pty_mont_384x_epilogue\n\n\tDD\timagerel $L$SEH_begin_mulx_mont_384\n\tDD\timagerel $L$SEH_body_mulx_mont_384\n\tDD\timagerel $L$SEH_info_mulx_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_mulx_mont_384\n\tDD\timagerel $L$SEH_epilogue_mulx_mont_384\n\tDD\timagerel $L$SEH_info_mulx_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_mulx_mont_384\n\tDD\timagerel $L$SEH_end_mulx_mont_384\n\tDD\timagerel $L$SEH_info_mulx_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_mont_384\n\tDD\timagerel $L$SEH_body_sqrx_mont_384\n\tDD\timagerel $L$SEH_info_sqrx_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_mont_384\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_384\n\tDD\timagerel $L$SEH_info_sqrx_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_384\n\tDD\timagerel $L$SEH_end_sqrx_mont_384\n\tDD\timagerel $L$SEH_info_sqrx_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_n_mul_mont_384\n\tDD\timagerel $L$SEH_body_sqrx_n_mul_mont_384\n\tDD\timagerel $L$SEH_info_sqrx_n_mul_mont_384_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_n_mul_mont_384\n\tDD\timagerel $L$SEH_epilogue_sqrx_n_mul_mont_384\n\tDD\timagerel $L$SEH_info_sqrx_n_mul_mont_384_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_n_mul_mont_384\n\tDD\timagerel $L$SEH_end_sqrx_n_mul_mont_384\n\tDD\timagerel $L$SEH_info_sqrx_n_mul_mont_384_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_n_mul_mont_383\n\tDD\timagerel $L$SEH_body_sqrx_n_mul_mont_383\n\tDD\timagerel $L$SEH_info_sqrx_n_mul_mont_383_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_n_mul_mont_383\n\tDD\timagerel $L$SEH_epilogue_sqrx_n_mul_mont_383\n\tDD\timagerel $L$SEH_info_sqrx_n_mul_mont_383_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_n_mul_mont_383\n\tDD\timagerel $L$SEH_end_sqrx_n_mul_mont_383\n\tDD\timagerel $L$SEH_info_sqrx_n_mul_mont_383_epilogue\n\n\tDD\timagerel $L$SEH_begin_sqrx_mont_382x\n\tDD\timagerel $L$SEH_body_sqrx_mont_382x\n\tDD\timagerel $L$SEH_info_sqrx_mont_382x_prologue\n\n\tDD\timagerel $L$SEH_body_sqrx_mont_382x\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_382x\n\tDD\timagerel $L$SEH_info_sqrx_mont_382x_body\n\n\tDD\timagerel $L$SEH_epilogue_sqrx_mont_382x\n\tDD\timagerel $L$SEH_end_sqrx_mont_382x\n\tDD\timagerel $L$SEH_info_sqrx_mont_382x_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_mulx_mont_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mulx_mont_384x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,029h,000h\nDB\t000h,0e4h,02ah,000h\nDB\t000h,0d4h,02bh,000h\nDB\t000h,0c4h,02ch,000h\nDB\t000h,034h,02dh,000h\nDB\t000h,054h,02eh,000h\nDB\t000h,074h,030h,000h\nDB\t000h,064h,031h,000h\nDB\t000h,001h,02fh,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mulx_mont_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_mont_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_mont_384x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_mont_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mulx_382x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mulx_382x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mulx_382x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_382x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_382x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_382x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mulx_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mulx_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,000h,000h\nDB\t000h,0e4h,001h,000h\nDB\t000h,0d4h,002h,000h\nDB\t000h,0c4h,003h,000h\nDB\t000h,034h,004h,000h\nDB\t000h,054h,005h,000h\nDB\t000h,074h,007h,000h\nDB\t000h,064h,008h,000h\nDB\t000h,052h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mulx_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_redcx_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_redcx_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_redcx_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_fromx_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_fromx_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_fromx_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sgn0x_pty_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sgn0x_pty_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sgn0x_pty_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sgn0x_pty_mont_384x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sgn0x_pty_mont_384x_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,001h,000h\nDB\t000h,0e4h,002h,000h\nDB\t000h,0d4h,003h,000h\nDB\t000h,0c4h,004h,000h\nDB\t000h,034h,005h,000h\nDB\t000h,054h,006h,000h\nDB\t000h,074h,008h,000h\nDB\t000h,064h,009h,000h\nDB\t000h,062h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sgn0x_pty_mont_384x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_mulx_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_mulx_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,003h,000h\nDB\t000h,0e4h,004h,000h\nDB\t000h,0d4h,005h,000h\nDB\t000h,0c4h,006h,000h\nDB\t000h,034h,007h,000h\nDB\t000h,054h,008h,000h\nDB\t000h,074h,00ah,000h\nDB\t000h,064h,00bh,000h\nDB\t000h,082h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_mulx_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,003h,000h\nDB\t000h,0e4h,004h,000h\nDB\t000h,0d4h,005h,000h\nDB\t000h,0c4h,006h,000h\nDB\t000h,034h,007h,000h\nDB\t000h,054h,008h,000h\nDB\t000h,074h,00ah,000h\nDB\t000h,064h,00bh,000h\nDB\t000h,082h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_n_mul_mont_384_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_n_mul_mont_384_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,005h,000h\nDB\t000h,0e4h,006h,000h\nDB\t000h,0d4h,007h,000h\nDB\t000h,0c4h,008h,000h\nDB\t000h,034h,009h,000h\nDB\t000h,054h,00ah,000h\nDB\t000h,074h,00ch,000h\nDB\t000h,064h,00dh,000h\nDB\t000h,0a2h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_n_mul_mont_384_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_n_mul_mont_383_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_n_mul_mont_383_body::\nDB\t1,0,17,0\nDB\t000h,0f4h,005h,000h\nDB\t000h,0e4h,006h,000h\nDB\t000h,0d4h,007h,000h\nDB\t000h,0c4h,008h,000h\nDB\t000h,034h,009h,000h\nDB\t000h,054h,00ah,000h\nDB\t000h,074h,00ch,000h\nDB\t000h,064h,00dh,000h\nDB\t000h,0a2h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_n_mul_mont_383_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_sqrx_mont_382x_prologue::\nDB\t1,0,5,00bh\nDB\t0,074h,1,0\nDB\t0,064h,2,0\nDB\t0,0b3h\nDB\t0,0\n\tDD\t0,0\n$L$SEH_info_sqrx_mont_382x_body::\nDB\t1,0,18,0\nDB\t000h,0f4h,011h,000h\nDB\t000h,0e4h,012h,000h\nDB\t000h,0d4h,013h,000h\nDB\t000h,0c4h,014h,000h\nDB\t000h,034h,015h,000h\nDB\t000h,054h,016h,000h\nDB\t000h,074h,018h,000h\nDB\t000h,064h,019h,000h\nDB\t000h,001h,017h,000h\nDB\t000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_sqrx_mont_382x_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build/win64/sha256-armv8.asm",
    "content": " GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA 64/8\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\tCOMMON\t|__blst_platform_cap|,4\n\tAREA\t|.text|,CODE,ALIGN=8,ARM64\n\n\tALIGN\t64\n\n|$LK256|\n\tDCDU\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n\tDCDU\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n\tDCDU\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n\tDCDU\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n\tDCDU\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n\tDCDU\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n\tDCDU\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n\tDCDU\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n\tDCDU\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n\tDCDU\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n\tDCDU\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n\tDCDU\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n\tDCDU\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n\tDCDU\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n\tDCDU\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n\tDCDU\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\tDCDU\t0\n\n\tDCB\t\"SHA256 block transform for ARMv8, CRYPTOGAMS by @dot-asm\",0\n\tALIGN\t4\n\tALIGN\t4\n\n\n\tEXPORT\t|blst_sha256_block_armv8|[FUNC]\n\tALIGN\t64\n|blst_sha256_block_armv8| PROC\n\thint\t#34\n|$Lv8_entry|\n\tstp\tx29,x30,[sp,#-2*__SIZEOF_POINTER__]!\n\tadd\tx29,sp,#0\n\n\tld1\t{v0.4s,v1.4s},[x0]\n\tadr\tx3,|$LK256|\n\n|$Loop_hw|\n\tld1\t{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64\n\tsub\tx2,x2,#1\n\tld1\t{v16.4s},[x3],#16\n\trev32\tv4.16b,v4.16b\n\trev32\tv5.16b,v5.16b\n\trev32\tv6.16b,v6.16b\n\trev32\tv7.16b,v7.16b\n\torr\tv18.16b,v0.16b,v0.16b\n\torr\tv19.16b,v1.16b,v1.16b\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\tDCDU\t0x5e2828a4\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\tDCDU\t0x5e0760c4\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\tDCDU\t0x5e2828c5\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\tDCDU\t0x5e0460e5\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n\tDCDU\t0x5e2828e6\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\tDCDU\t0x5e056086\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n\tDCDU\t0x5e282887\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\tDCDU\t0x5e0660a7\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\tDCDU\t0x5e2828a4\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\tDCDU\t0x5e0760c4\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\tDCDU\t0x5e2828c5\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\tDCDU\t0x5e0460e5\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n\tDCDU\t0x5e2828e6\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\tDCDU\t0x5e056086\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n\tDCDU\t0x5e282887\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\tDCDU\t0x5e0660a7\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\tDCDU\t0x5e2828a4\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\tDCDU\t0x5e0760c4\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\tDCDU\t0x5e2828c5\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\tDCDU\t0x5e0460e5\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v6.4s\n\tDCDU\t0x5e2828e6\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\tDCDU\t0x5e056086\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v7.4s\n\tDCDU\t0x5e282887\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\tDCDU\t0x5e0660a7\n\tld1\t{v17.4s},[x3],#16\n\tadd\tv16.4s,v16.4s,v4.4s\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\n\tld1\t{v16.4s},[x3],#16\n\tadd\tv17.4s,v17.4s,v5.4s\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\n\tld1\t{v17.4s},[x3]\n\tadd\tv16.4s,v16.4s,v6.4s\n\tsub\tx3,x3,#64*4-16\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e104020\n\tDCDU\t0x5e105041\n\n\tadd\tv17.4s,v17.4s,v7.4s\n\torr\tv2.16b,v0.16b,v0.16b\n\tDCDU\t0x5e114020\n\tDCDU\t0x5e115041\n\n\tadd\tv0.4s,v0.4s,v18.4s\n\tadd\tv1.4s,v1.4s,v19.4s\n\n\tcbnz\tx2,|$Loop_hw|\n\n\tst1\t{v0.4s,v1.4s},[x0]\n\n\tldr\tx29,[sp],#2*__SIZEOF_POINTER__\n\tret\n\tENDP\n\n\n\tEXPORT\t|blst_sha256_block_data_order|[FUNC]\n\tALIGN\t16\n|blst_sha256_block_data_order| PROC\n\thint\t#34\n\tadrp\tx16,__blst_platform_cap\n\tldr\tw16,[x16,__blst_platform_cap]\n\ttst\tw16,#1\n\tbne\t|$Lv8_entry|\n\n\tstp\tx29, x30, [sp, #-2*__SIZEOF_POINTER__]!\n\tmov\tx29, sp\n\tsub\tsp,sp,#16*4\n\n\tadr\tx16,|$LK256|\n\tadd\tx2,x1,x2,lsl#6\n\n\tld1\t{v0.16b},[x1], #16\n\tld1\t{v1.16b},[x1], #16\n\tld1\t{v2.16b},[x1], #16\n\tld1\t{v3.16b},[x1], #16\n\tld1\t{v4.4s},[x16], #16\n\tld1\t{v5.4s},[x16], #16\n\tld1\t{v6.4s},[x16], #16\n\tld1\t{v7.4s},[x16], #16\n\trev32\tv0.16b,v0.16b\n\trev32\tv1.16b,v1.16b\n\trev32\tv2.16b,v2.16b\n\trev32\tv3.16b,v3.16b\n\tmov\tx17,sp\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tv5.4s,v5.4s,v1.4s\n\tadd\tv6.4s,v6.4s,v2.4s\n\tst1\t{v4.4s,v5.4s},[x17], #32\n\tadd\tv7.4s,v7.4s,v3.4s\n\tst1\t{v6.4s,v7.4s},[x17]\n\tsub\tx17,x17,#32\n\n\tldp\tw3,w4,[x0]\n\tldp\tw5,w6,[x0,#8]\n\tldp\tw7,w8,[x0,#16]\n\tldp\tw9,w10,[x0,#24]\n\tldr\tw12,[sp,#0]\n\tmov\tw13,wzr\n\teor\tw14,w4,w5\n\tmov\tw15,wzr\n\tb\t|$L_00_48|\n\n\tALIGN\t16\n|$L_00_48|\n\text8\tv4.16b,v0.16b,v1.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text8\tv7.16b,v2.16b,v3.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v3.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv0.4s,v0.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv0.4s,v0.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv0.4s,v0.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v0.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v0.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v0.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v0.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v0.4s,#13\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv0.4s,v0.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text8\tv4.16b,v1.16b,v2.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text8\tv7.16b,v3.16b,v0.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v0.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv1.4s,v1.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv1.4s,v1.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv1.4s,v1.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v1.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v1.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v1.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v1.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v1.4s,#13\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv1.4s,v1.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\text8\tv4.16b,v2.16b,v3.16b,#4\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tbic\tw15,w9,w7\n\text8\tv7.16b,v0.16b,v1.16b,#4\n\teor\tw11,w7,w7,ror#5\n\tadd\tw3,w3,w13\n\tmov\td19,v1.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w3,w3,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw10,w10,w12\n\tadd\tv2.4s,v2.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w4\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\tadd\tv2.4s,v2.4s,v5.4s\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tadd\tv2.4s,v2.4s,v17.4s\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\tushr\tv18.4s,v2.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v2.4s,#10\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tsli\tv18.4s,v2.4s,#15\n\tadd\tw8,w8,w12\n\tushr\tv17.4s,v2.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tsli\tv17.4s,v2.4s,#13\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw4,w4,w8\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w10\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\tadd\tv2.4s,v2.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\text8\tv4.16b,v3.16b,v0.16b,#4\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tbic\tw15,w5,w3\n\text8\tv7.16b,v1.16b,v2.16b,#4\n\teor\tw11,w3,w3,ror#5\n\tadd\tw7,w7,w13\n\tmov\td19,v2.d[1]\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\tushr\tv6.4s,v4.4s,#7\n\teor\tw15,w7,w7,ror#11\n\tushr\tv5.4s,v4.4s,#3\n\tadd\tw6,w6,w12\n\tadd\tv3.4s,v3.4s,v7.4s\n\tror\tw11,w11,#6\n\tsli\tv6.4s,v4.4s,#25\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tushr\tv7.4s,v4.4s,#18\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\teor\tv5.16b,v5.16b,v6.16b\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\tsli\tv7.4s,v4.4s,#14\n\teor\tw14,w14,w8\n\tushr\tv16.4s,v19.4s,#17\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\teor\tv5.16b,v5.16b,v7.16b\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tsli\tv16.4s,v19.4s,#15\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\tushr\tv17.4s,v19.4s,#10\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tushr\tv7.4s,v19.4s,#19\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\tadd\tv3.4s,v3.4s,v5.4s\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tsli\tv7.4s,v19.4s,#13\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\teor\tv17.16b,v17.16b,v16.16b\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\teor\tv17.16b,v17.16b,v7.16b\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tadd\tv3.4s,v3.4s,v17.4s\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\tushr\tv18.4s,v3.4s,#17\n\torr\tw12,w12,w15\n\tushr\tv19.4s,v3.4s,#10\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tsli\tv18.4s,v3.4s,#15\n\tadd\tw4,w4,w12\n\tushr\tv17.4s,v3.4s,#19\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tv19.16b,v19.16b,v18.16b\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tsli\tv17.4s,v3.4s,#13\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tld1\t{v4.4s},[x16], #16\n\tadd\tw8,w8,w4\n\teor\tv19.16b,v19.16b,v17.16b\n\teor\tw14,w14,w6\n\teor\tv17.16b,v17.16b,v17.16b\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tmov\tv17.d[1],v19.d[0]\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\tadd\tv3.4s,v3.4s,v17.4s\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[x16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tcmp\tw12,#0\n\tldr\tw12,[sp,#0]\n\tsub\tx17,x17,#64\n\tbne\t|$L_00_48|\n\n\tsub\tx16,x16,#256\n\tcmp\tx1,x2\n\tmov\tx17, #-64\n\tcseleq\tx17,x17,xzr\n\tadd\tx1,x1,x17\n\tmov\tx17,sp\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v0.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv0.16b,v0.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v0.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#4]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#8]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#12]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#16]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v1.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv1.16b,v1.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v1.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#20]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#24]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#28]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tldr\tw12,[sp,#32]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw10,w10,w12\n\tadd\tw3,w3,w15\n\tand\tw12,w8,w7\n\tld1\t{v2.16b},[x1],#16\n\tbic\tw15,w9,w7\n\teor\tw11,w7,w7,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw3,w3,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w7,ror#19\n\teor\tw15,w3,w3,ror#11\n\trev32\tv2.16b,v2.16b\n\tadd\tw10,w10,w12\n\tror\tw11,w11,#6\n\teor\tw13,w3,w4\n\teor\tw15,w15,w3,ror#20\n\tadd\tv4.4s,v4.4s,v2.4s\n\tadd\tw10,w10,w11\n\tldr\tw12,[sp,#36]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw6,w6,w10\n\teor\tw14,w14,w4\n\tadd\tw9,w9,w12\n\tadd\tw10,w10,w15\n\tand\tw12,w7,w6\n\tbic\tw15,w8,w6\n\teor\tw11,w6,w6,ror#5\n\tadd\tw10,w10,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w6,ror#19\n\teor\tw15,w10,w10,ror#11\n\tadd\tw9,w9,w12\n\tror\tw11,w11,#6\n\teor\tw14,w10,w3\n\teor\tw15,w15,w10,ror#20\n\tadd\tw9,w9,w11\n\tldr\tw12,[sp,#40]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw5,w5,w9\n\teor\tw13,w13,w3\n\tadd\tw8,w8,w12\n\tadd\tw9,w9,w15\n\tand\tw12,w6,w5\n\tbic\tw15,w7,w5\n\teor\tw11,w5,w5,ror#5\n\tadd\tw9,w9,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w5,ror#19\n\teor\tw15,w9,w9,ror#11\n\tadd\tw8,w8,w12\n\tror\tw11,w11,#6\n\teor\tw13,w9,w10\n\teor\tw15,w15,w9,ror#20\n\tadd\tw8,w8,w11\n\tldr\tw12,[sp,#44]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw4,w4,w8\n\teor\tw14,w14,w10\n\tadd\tw7,w7,w12\n\tadd\tw8,w8,w15\n\tand\tw12,w5,w4\n\tbic\tw15,w6,w4\n\teor\tw11,w4,w4,ror#5\n\tadd\tw8,w8,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w4,ror#19\n\teor\tw15,w8,w8,ror#11\n\tadd\tw7,w7,w12\n\tror\tw11,w11,#6\n\teor\tw14,w8,w9\n\teor\tw15,w15,w8,ror#20\n\tadd\tw7,w7,w11\n\tldr\tw12,[sp,#48]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw3,w3,w7\n\teor\tw13,w13,w9\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw6,w6,w12\n\tadd\tw7,w7,w15\n\tand\tw12,w4,w3\n\tld1\t{v3.16b},[x1],#16\n\tbic\tw15,w5,w3\n\teor\tw11,w3,w3,ror#5\n\tld1\t{v4.4s},[x16],#16\n\tadd\tw7,w7,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w3,ror#19\n\teor\tw15,w7,w7,ror#11\n\trev32\tv3.16b,v3.16b\n\tadd\tw6,w6,w12\n\tror\tw11,w11,#6\n\teor\tw13,w7,w8\n\teor\tw15,w15,w7,ror#20\n\tadd\tv4.4s,v4.4s,v3.4s\n\tadd\tw6,w6,w11\n\tldr\tw12,[sp,#52]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw10,w10,w6\n\teor\tw14,w14,w8\n\tadd\tw5,w5,w12\n\tadd\tw6,w6,w15\n\tand\tw12,w3,w10\n\tbic\tw15,w4,w10\n\teor\tw11,w10,w10,ror#5\n\tadd\tw6,w6,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w10,ror#19\n\teor\tw15,w6,w6,ror#11\n\tadd\tw5,w5,w12\n\tror\tw11,w11,#6\n\teor\tw14,w6,w7\n\teor\tw15,w15,w6,ror#20\n\tadd\tw5,w5,w11\n\tldr\tw12,[sp,#56]\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw9,w9,w5\n\teor\tw13,w13,w7\n\tadd\tw4,w4,w12\n\tadd\tw5,w5,w15\n\tand\tw12,w10,w9\n\tbic\tw15,w3,w9\n\teor\tw11,w9,w9,ror#5\n\tadd\tw5,w5,w13\n\torr\tw12,w12,w15\n\teor\tw11,w11,w9,ror#19\n\teor\tw15,w5,w5,ror#11\n\tadd\tw4,w4,w12\n\tror\tw11,w11,#6\n\teor\tw13,w5,w6\n\teor\tw15,w15,w5,ror#20\n\tadd\tw4,w4,w11\n\tldr\tw12,[sp,#60]\n\tand\tw14,w14,w13\n\tror\tw15,w15,#2\n\tadd\tw8,w8,w4\n\teor\tw14,w14,w6\n\tadd\tw3,w3,w12\n\tadd\tw4,w4,w15\n\tand\tw12,w9,w8\n\tbic\tw15,w10,w8\n\teor\tw11,w8,w8,ror#5\n\tadd\tw4,w4,w14\n\torr\tw12,w12,w15\n\teor\tw11,w11,w8,ror#19\n\teor\tw15,w4,w4,ror#11\n\tadd\tw3,w3,w12\n\tror\tw11,w11,#6\n\teor\tw14,w4,w5\n\teor\tw15,w15,w4,ror#20\n\tadd\tw3,w3,w11\n\tand\tw13,w13,w14\n\tror\tw15,w15,#2\n\tadd\tw7,w7,w3\n\teor\tw13,w13,w5\n\tst1\t{v4.4s},[x17], #16\n\tadd\tw3,w3,w15\n\tldp\tw11,w12,[x0,#0]\n\tadd\tw3,w3,w13\n\tldp\tw13,w14,[x0,#8]\n\tadd\tw3,w3,w11\n\tadd\tw4,w4,w12\n\tldp\tw11,w12,[x0,#16]\n\tadd\tw5,w5,w13\n\tadd\tw6,w6,w14\n\tldp\tw13,w14,[x0,#24]\n\tadd\tw7,w7,w11\n\tadd\tw8,w8,w12\n\tldr\tw12,[sp,#0]\n\tstp\tw3,w4,[x0,#0]\n\tadd\tw9,w9,w13\n\tmov\tw13,wzr\n\tstp\tw5,w6,[x0,#8]\n\tadd\tw10,w10,w14\n\tstp\tw7,w8,[x0,#16]\n\teor\tw14,w4,w5\n\tstp\tw9,w10,[x0,#24]\n\tmov\tw15,wzr\n\tmov\tx17,sp\n\tbne\t|$L_00_48|\n\n\tldr\tx29,[x29]\n\tadd\tsp,sp,#16*4+2*__SIZEOF_POINTER__\n\tret\n\tENDP\n\n\n\tEXPORT\t|blst_sha256_emit|[FUNC]\n\tALIGN\t16\n|blst_sha256_emit| PROC\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n if :lnot::def:\t__AARCH64EB__\n\trev\tx4,x4\n\trev\tx5,x5\n\trev\tx6,x6\n\trev\tx7,x7\n endif\n\tstr\tw4,[x0,#4]\n\tlsr\tx4,x4,#32\n\tstr\tw5,[x0,#12]\n\tlsr\tx5,x5,#32\n\tstr\tw6,[x0,#20]\n\tlsr\tx6,x6,#32\n\tstr\tw7,[x0,#28]\n\tlsr\tx7,x7,#32\n\tstr\tw4,[x0,#0]\n\tstr\tw5,[x0,#8]\n\tstr\tw6,[x0,#16]\n\tstr\tw7,[x0,#24]\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|blst_sha256_bcopy|[FUNC]\n\tALIGN\t16\n|blst_sha256_bcopy| PROC\n\thint\t#34\n|$Loop_bcopy|\n\tldrb\tw3,[x1],#1\n\tsub\tx2,x2,#1\n\tstrb\tw3,[x0],#1\n\tcbnz\tx2,|$Loop_bcopy|\n\tret\n\tENDP\n\n\n\n\tEXPORT\t|blst_sha256_hcopy|[FUNC]\n\tALIGN\t16\n|blst_sha256_hcopy| PROC\n\thint\t#34\n\tldp\tx4,x5,[x1]\n\tldp\tx6,x7,[x1,#16]\n\tstp\tx4,x5,[x0]\n\tstp\tx6,x7,[x0,#16]\n\tret\n\tENDP\n\tEND\n"
  },
  {
    "path": "build/win64/sha256-x86_64.asm",
    "content": "OPTION\tDOTNAME\n_DATA\tSEGMENT\nCOMM\t__blst_platform_cap:DWORD:1\n\n_DATA\tENDS\n.rdata\tSEGMENT READONLY ALIGN(256)\nALIGN\t64\n\nK256::\n\tDD\t0428a2f98h,071374491h,0b5c0fbcfh,0e9b5dba5h\n\tDD\t03956c25bh,059f111f1h,0923f82a4h,0ab1c5ed5h\n\tDD\t0d807aa98h,012835b01h,0243185beh,0550c7dc3h\n\tDD\t072be5d74h,080deb1feh,09bdc06a7h,0c19bf174h\n\tDD\t0e49b69c1h,0efbe4786h,00fc19dc6h,0240ca1cch\n\tDD\t02de92c6fh,04a7484aah,05cb0a9dch,076f988dah\n\tDD\t0983e5152h,0a831c66dh,0b00327c8h,0bf597fc7h\n\tDD\t0c6e00bf3h,0d5a79147h,006ca6351h,014292967h\n\tDD\t027b70a85h,02e1b2138h,04d2c6dfch,053380d13h\n\tDD\t0650a7354h,0766a0abbh,081c2c92eh,092722c85h\n\tDD\t0a2bfe8a1h,0a81a664bh,0c24b8b70h,0c76c51a3h\n\tDD\t0d192e819h,0d6990624h,0f40e3585h,0106aa070h\n\tDD\t019a4c116h,01e376c08h,02748774ch,034b0bcb5h\n\tDD\t0391c0cb3h,04ed8aa4ah,05b9cca4fh,0682e6ff3h\n\tDD\t0748f82eeh,078a5636fh,084c87814h,08cc70208h\n\tDD\t090befffah,0a4506cebh,0bef9a3f7h,0c67178f2h\n\n\tDD\t000010203h,004050607h,008090a0bh,00c0d0e0fh\n\tDD\t003020100h,00b0a0908h,0ffffffffh,0ffffffffh\n\tDD\t0ffffffffh,0ffffffffh,003020100h,00b0a0908h\nDB\t83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97\nDB\t110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54\nDB\t52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121\nDB\t32,64,100,111,116,45,97,115,109,0\n.rdata\tENDS\n.text$\tSEGMENT ALIGN(256) 'CODE'\nPUBLIC\tblst_sha256_block_data_order_shaext\n\n\nALIGN\t64\nblst_sha256_block_data_order_shaext\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_blst_sha256_block_data_order_shaext::\n\n\n\tpush\trbp\n\n\tmov\trbp,rsp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\n$L$blst_sha256_block_data_order$2::\n\tsub\trsp,050h\n\n\tmovaps\tXMMWORD PTR[(-80)+rbp],xmm6\n\tmovaps\tXMMWORD PTR[(-64)+rbp],xmm7\n\tmovaps\tXMMWORD PTR[(-48)+rbp],xmm8\n\tmovaps\tXMMWORD PTR[(-32)+rbp],xmm9\n\tmovaps\tXMMWORD PTR[(-16)+rbp],xmm10\n\n$L$SEH_body_blst_sha256_block_data_order_shaext::\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tlea\trcx,QWORD PTR[((K256+128))]\n\tmovdqu\txmm1,XMMWORD PTR[rdi]\n\tmovdqu\txmm2,XMMWORD PTR[16+rdi]\n\tmovdqa\txmm7,XMMWORD PTR[((256-128))+rcx]\n\n\tpshufd\txmm0,xmm1,01bh\n\tpshufd\txmm1,xmm1,0b1h\n\tpshufd\txmm2,xmm2,01bh\n\tmovdqa\txmm8,xmm7\nDB\t102,15,58,15,202,8\n\tpunpcklqdq\txmm2,xmm0\n\tjmp\t$L$oop_shaext\n\nALIGN\t16\n$L$oop_shaext::\n\tmovdqu\txmm3,XMMWORD PTR[rsi]\n\tmovdqu\txmm4,XMMWORD PTR[16+rsi]\n\tmovdqu\txmm5,XMMWORD PTR[32+rsi]\nDB\t102,15,56,0,223\n\tmovdqu\txmm6,XMMWORD PTR[48+rsi]\n\n\tmovdqa\txmm0,XMMWORD PTR[((0-128))+rcx]\n\tpaddd\txmm0,xmm3\nDB\t102,15,56,0,231\n\tmovdqa\txmm10,xmm2\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tnop\n\tmovdqa\txmm9,xmm1\nDB\t15,56,203,202\n\n\tmovdqa\txmm0,XMMWORD PTR[((16-128))+rcx]\n\tpaddd\txmm0,xmm4\nDB\t102,15,56,0,239\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tlea\trsi,QWORD PTR[64+rsi]\nDB\t15,56,204,220\nDB\t15,56,203,202\n\n\tmovdqa\txmm0,XMMWORD PTR[((32-128))+rcx]\n\tpaddd\txmm0,xmm5\nDB\t102,15,56,0,247\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm6\nDB\t102,15,58,15,253,4\n\tnop\n\tpaddd\txmm3,xmm7\nDB\t15,56,204,229\nDB\t15,56,203,202\n\n\tmovdqa\txmm0,XMMWORD PTR[((48-128))+rcx]\n\tpaddd\txmm0,xmm6\nDB\t15,56,205,222\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm3\nDB\t102,15,58,15,254,4\n\tnop\n\tpaddd\txmm4,xmm7\nDB\t15,56,204,238\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((64-128))+rcx]\n\tpaddd\txmm0,xmm3\nDB\t15,56,205,227\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm4\nDB\t102,15,58,15,251,4\n\tnop\n\tpaddd\txmm5,xmm7\nDB\t15,56,204,243\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((80-128))+rcx]\n\tpaddd\txmm0,xmm4\nDB\t15,56,205,236\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm5\nDB\t102,15,58,15,252,4\n\tnop\n\tpaddd\txmm6,xmm7\nDB\t15,56,204,220\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((96-128))+rcx]\n\tpaddd\txmm0,xmm5\nDB\t15,56,205,245\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm6\nDB\t102,15,58,15,253,4\n\tnop\n\tpaddd\txmm3,xmm7\nDB\t15,56,204,229\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((112-128))+rcx]\n\tpaddd\txmm0,xmm6\nDB\t15,56,205,222\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm3\nDB\t102,15,58,15,254,4\n\tnop\n\tpaddd\txmm4,xmm7\nDB\t15,56,204,238\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((128-128))+rcx]\n\tpaddd\txmm0,xmm3\nDB\t15,56,205,227\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm4\nDB\t102,15,58,15,251,4\n\tnop\n\tpaddd\txmm5,xmm7\nDB\t15,56,204,243\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((144-128))+rcx]\n\tpaddd\txmm0,xmm4\nDB\t15,56,205,236\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm5\nDB\t102,15,58,15,252,4\n\tnop\n\tpaddd\txmm6,xmm7\nDB\t15,56,204,220\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((160-128))+rcx]\n\tpaddd\txmm0,xmm5\nDB\t15,56,205,245\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm6\nDB\t102,15,58,15,253,4\n\tnop\n\tpaddd\txmm3,xmm7\nDB\t15,56,204,229\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((176-128))+rcx]\n\tpaddd\txmm0,xmm6\nDB\t15,56,205,222\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm3\nDB\t102,15,58,15,254,4\n\tnop\n\tpaddd\txmm4,xmm7\nDB\t15,56,204,238\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((192-128))+rcx]\n\tpaddd\txmm0,xmm3\nDB\t15,56,205,227\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm4\nDB\t102,15,58,15,251,4\n\tnop\n\tpaddd\txmm5,xmm7\nDB\t15,56,204,243\nDB\t15,56,203,202\n\tmovdqa\txmm0,XMMWORD PTR[((208-128))+rcx]\n\tpaddd\txmm0,xmm4\nDB\t15,56,205,236\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tmovdqa\txmm7,xmm5\nDB\t102,15,58,15,252,4\nDB\t15,56,203,202\n\tpaddd\txmm6,xmm7\n\n\tmovdqa\txmm0,XMMWORD PTR[((224-128))+rcx]\n\tpaddd\txmm0,xmm5\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\nDB\t15,56,205,245\n\tmovdqa\txmm7,xmm8\nDB\t15,56,203,202\n\n\tmovdqa\txmm0,XMMWORD PTR[((240-128))+rcx]\n\tpaddd\txmm0,xmm6\n\tnop\nDB\t15,56,203,209\n\tpshufd\txmm0,xmm0,00eh\n\tdec\trdx\n\tnop\nDB\t15,56,203,202\n\n\tpaddd\txmm2,xmm10\n\tpaddd\txmm1,xmm9\n\tjnz\t$L$oop_shaext\n\n\tpshufd\txmm2,xmm2,0b1h\n\tpshufd\txmm7,xmm1,01bh\n\tpshufd\txmm1,xmm1,0b1h\n\tpunpckhqdq\txmm1,xmm2\nDB\t102,15,58,15,215,8\n\n\tmovdqu\tXMMWORD PTR[rdi],xmm1\n\tmovdqu\tXMMWORD PTR[16+rdi],xmm2\n\tmovaps\txmm6,XMMWORD PTR[((-80))+rbp]\n\tmovaps\txmm7,XMMWORD PTR[((-64))+rbp]\n\tmovaps\txmm8,XMMWORD PTR[((-48))+rbp]\n\tmovaps\txmm9,XMMWORD PTR[((-32))+rbp]\n\tmovaps\txmm10,XMMWORD PTR[((-16))+rbp]\n\tmov\trsp,rbp\n\n\tpop\trbp\n\n$L$SEH_epilogue_blst_sha256_block_data_order_shaext::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_blst_sha256_block_data_order_shaext::\nblst_sha256_block_data_order_shaext\tENDP\nPUBLIC\tblst_sha256_block_data_order\n\n\nALIGN\t64\nblst_sha256_block_data_order\tPROC PUBLIC\n\tDB\t243,15,30,250\n\tmov\tQWORD PTR[8+rsp],rdi\t;WIN64 prologue\n\tmov\tQWORD PTR[16+rsp],rsi\n\tmov\tr11,rsp\n$L$SEH_begin_blst_sha256_block_data_order::\n\n\n\tpush\trbp\n\n\tmov\trbp,rsp\n\n\tmov\trdi,rcx\n\tmov\trsi,rdx\n\tmov\trdx,r8\nifndef\t__SGX_LVI_HARDENING__\n\ttest\tDWORD PTR[__blst_platform_cap],2\n\tjnz\t$L$blst_sha256_block_data_order$2\nendif\n\tpush\trbx\n\n\tpush\tr12\n\n\tpush\tr13\n\n\tpush\tr14\n\n\tpush\tr15\n\n\tshl\trdx,4\n\tsub\trsp,88\n\n\tlea\trdx,QWORD PTR[rdx*4+rsi]\n\tmov\tQWORD PTR[((-64))+rbp],rdi\n\n\tmov\tQWORD PTR[((-48))+rbp],rdx\n\tmovaps\tXMMWORD PTR[(-128)+rbp],xmm6\n\tmovaps\tXMMWORD PTR[(-112)+rbp],xmm7\n\tmovaps\tXMMWORD PTR[(-96)+rbp],xmm8\n\tmovaps\tXMMWORD PTR[(-80)+rbp],xmm9\n\n$L$SEH_body_blst_sha256_block_data_order::\n\n\n\tlea\trsp,QWORD PTR[((-64))+rsp]\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\teax,DWORD PTR[rdi]\n\tand\trsp,-64\n\tmov\tebx,DWORD PTR[4+rdi]\n\tmov\tecx,DWORD PTR[8+rdi]\n\tmov\tedx,DWORD PTR[12+rdi]\n\tmov\tr8d,DWORD PTR[16+rdi]\n\tmov\tr9d,DWORD PTR[20+rdi]\n\tmov\tr10d,DWORD PTR[24+rdi]\n\tmov\tr11d,DWORD PTR[28+rdi]\n\n\n\tjmp\t$L$loop_ssse3\nALIGN\t16\n$L$loop_ssse3::\n\tmovdqa\txmm7,XMMWORD PTR[((K256+256))]\n\tmov\tQWORD PTR[((-56))+rbp],rsi\n\tmovdqu\txmm0,XMMWORD PTR[rsi]\n\tmovdqu\txmm1,XMMWORD PTR[16+rsi]\n\tmovdqu\txmm2,XMMWORD PTR[32+rsi]\nDB\t102,15,56,0,199\n\tmovdqu\txmm3,XMMWORD PTR[48+rsi]\n\tlea\trsi,QWORD PTR[K256]\nDB\t102,15,56,0,207\n\tmovdqa\txmm4,XMMWORD PTR[rsi]\n\tmovdqa\txmm5,XMMWORD PTR[16+rsi]\nDB\t102,15,56,0,215\n\tpaddd\txmm4,xmm0\n\tmovdqa\txmm6,XMMWORD PTR[32+rsi]\nDB\t102,15,56,0,223\n\tmovdqa\txmm7,XMMWORD PTR[48+rsi]\n\tpaddd\txmm5,xmm1\n\tpaddd\txmm6,xmm2\n\tpaddd\txmm7,xmm3\n\tmovdqa\tXMMWORD PTR[rsp],xmm4\n\tmov\tr14d,eax\n\tmovdqa\tXMMWORD PTR[16+rsp],xmm5\n\tmov\tedi,ebx\n\tmovdqa\tXMMWORD PTR[32+rsp],xmm6\n\txor\tedi,ecx\n\tmovdqa\tXMMWORD PTR[48+rsp],xmm7\n\tmov\tr13d,r8d\n\tjmp\t$L$ssse3_00_47\n\nALIGN\t16\n$L$ssse3_00_47::\n\tsub\trsi,-64\n\tror\tr13d,14\n\tmovdqa\txmm4,xmm1\n\tmov\teax,r14d\n\tmov\tr12d,r9d\n\tmovdqa\txmm7,xmm3\n\tror\tr14d,9\n\txor\tr13d,r8d\n\txor\tr12d,r10d\n\tror\tr13d,5\n\txor\tr14d,eax\nDB\t102,15,58,15,224,4\n\tand\tr12d,r8d\n\txor\tr13d,r8d\nDB\t102,15,58,15,250,4\n\tadd\tr11d,DWORD PTR[rsp]\n\tmov\tr15d,eax\n\txor\tr12d,r10d\n\tror\tr14d,11\n\tmovdqa\txmm5,xmm4\n\txor\tr15d,ebx\n\tadd\tr11d,r12d\n\tmovdqa\txmm6,xmm4\n\tror\tr13d,6\n\tand\tedi,r15d\n\tpsrld\txmm4,3\n\txor\tr14d,eax\n\tadd\tr11d,r13d\n\txor\tedi,ebx\n\tpaddd\txmm0,xmm7\n\tror\tr14d,2\n\tadd\tedx,r11d\n\tpsrld\txmm6,7\n\tadd\tr11d,edi\n\tmov\tr13d,edx\n\tpshufd\txmm7,xmm3,250\n\tadd\tr14d,r11d\n\tror\tr13d,14\n\tpslld\txmm5,14\n\tmov\tr11d,r14d\n\tmov\tr12d,r8d\n\tpxor\txmm4,xmm6\n\tror\tr14d,9\n\txor\tr13d,edx\n\txor\tr12d,r9d\n\tror\tr13d,5\n\tpsrld\txmm6,11\n\txor\tr14d,r11d\n\tpxor\txmm4,xmm5\n\tand\tr12d,edx\n\txor\tr13d,edx\n\tpslld\txmm5,11\n\tadd\tr10d,DWORD PTR[4+rsp]\n\tmov\tedi,r11d\n\tpxor\txmm4,xmm6\n\txor\tr12d,r9d\n\tror\tr14d,11\n\tmovdqa\txmm6,xmm7\n\txor\tedi,eax\n\tadd\tr10d,r12d\n\tpxor\txmm4,xmm5\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,r11d\n\tpsrld\txmm7,10\n\tadd\tr10d,r13d\n\txor\tr15d,eax\n\tpaddd\txmm0,xmm4\n\tror\tr14d,2\n\tadd\tecx,r10d\n\tpsrlq\txmm6,17\n\tadd\tr10d,r15d\n\tmov\tr13d,ecx\n\tadd\tr14d,r10d\n\tpxor\txmm7,xmm6\n\tror\tr13d,14\n\tmov\tr10d,r14d\n\tmov\tr12d,edx\n\tror\tr14d,9\n\tpsrlq\txmm6,2\n\txor\tr13d,ecx\n\txor\tr12d,r8d\n\tpxor\txmm7,xmm6\n\tror\tr13d,5\n\txor\tr14d,r10d\n\tand\tr12d,ecx\n\tpshufd\txmm7,xmm7,128\n\txor\tr13d,ecx\n\tadd\tr9d,DWORD PTR[8+rsp]\n\tmov\tr15d,r10d\n\tpsrldq\txmm7,8\n\txor\tr12d,r8d\n\tror\tr14d,11\n\txor\tr15d,r11d\n\tadd\tr9d,r12d\n\tror\tr13d,6\n\tpaddd\txmm0,xmm7\n\tand\tedi,r15d\n\txor\tr14d,r10d\n\tadd\tr9d,r13d\n\tpshufd\txmm7,xmm0,80\n\txor\tedi,r11d\n\tror\tr14d,2\n\tadd\tebx,r9d\n\tmovdqa\txmm6,xmm7\n\tadd\tr9d,edi\n\tmov\tr13d,ebx\n\tpsrld\txmm7,10\n\tadd\tr14d,r9d\n\tror\tr13d,14\n\tpsrlq\txmm6,17\n\tmov\tr9d,r14d\n\tmov\tr12d,ecx\n\tpxor\txmm7,xmm6\n\tror\tr14d,9\n\txor\tr13d,ebx\n\txor\tr12d,edx\n\tror\tr13d,5\n\txor\tr14d,r9d\n\tpsrlq\txmm6,2\n\tand\tr12d,ebx\n\txor\tr13d,ebx\n\tadd\tr8d,DWORD PTR[12+rsp]\n\tpxor\txmm7,xmm6\n\tmov\tedi,r9d\n\txor\tr12d,edx\n\tror\tr14d,11\n\tpshufd\txmm7,xmm7,8\n\txor\tedi,r10d\n\tadd\tr8d,r12d\n\tmovdqa\txmm6,XMMWORD PTR[rsi]\n\tror\tr13d,6\n\tand\tr15d,edi\n\tpslldq\txmm7,8\n\txor\tr14d,r9d\n\tadd\tr8d,r13d\n\txor\tr15d,r10d\n\tpaddd\txmm0,xmm7\n\tror\tr14d,2\n\tadd\teax,r8d\n\tadd\tr8d,r15d\n\tpaddd\txmm6,xmm0\n\tmov\tr13d,eax\n\tadd\tr14d,r8d\n\tmovdqa\tXMMWORD PTR[rsp],xmm6\n\tror\tr13d,14\n\tmovdqa\txmm4,xmm2\n\tmov\tr8d,r14d\n\tmov\tr12d,ebx\n\tmovdqa\txmm7,xmm0\n\tror\tr14d,9\n\txor\tr13d,eax\n\txor\tr12d,ecx\n\tror\tr13d,5\n\txor\tr14d,r8d\nDB\t102,15,58,15,225,4\n\tand\tr12d,eax\n\txor\tr13d,eax\nDB\t102,15,58,15,251,4\n\tadd\tedx,DWORD PTR[16+rsp]\n\tmov\tr15d,r8d\n\txor\tr12d,ecx\n\tror\tr14d,11\n\tmovdqa\txmm5,xmm4\n\txor\tr15d,r9d\n\tadd\tedx,r12d\n\tmovdqa\txmm6,xmm4\n\tror\tr13d,6\n\tand\tedi,r15d\n\tpsrld\txmm4,3\n\txor\tr14d,r8d\n\tadd\tedx,r13d\n\txor\tedi,r9d\n\tpaddd\txmm1,xmm7\n\tror\tr14d,2\n\tadd\tr11d,edx\n\tpsrld\txmm6,7\n\tadd\tedx,edi\n\tmov\tr13d,r11d\n\tpshufd\txmm7,xmm0,250\n\tadd\tr14d,edx\n\tror\tr13d,14\n\tpslld\txmm5,14\n\tmov\tedx,r14d\n\tmov\tr12d,eax\n\tpxor\txmm4,xmm6\n\tror\tr14d,9\n\txor\tr13d,r11d\n\txor\tr12d,ebx\n\tror\tr13d,5\n\tpsrld\txmm6,11\n\txor\tr14d,edx\n\tpxor\txmm4,xmm5\n\tand\tr12d,r11d\n\txor\tr13d,r11d\n\tpslld\txmm5,11\n\tadd\tecx,DWORD PTR[20+rsp]\n\tmov\tedi,edx\n\tpxor\txmm4,xmm6\n\txor\tr12d,ebx\n\tror\tr14d,11\n\tmovdqa\txmm6,xmm7\n\txor\tedi,r8d\n\tadd\tecx,r12d\n\tpxor\txmm4,xmm5\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,edx\n\tpsrld\txmm7,10\n\tadd\tecx,r13d\n\txor\tr15d,r8d\n\tpaddd\txmm1,xmm4\n\tror\tr14d,2\n\tadd\tr10d,ecx\n\tpsrlq\txmm6,17\n\tadd\tecx,r15d\n\tmov\tr13d,r10d\n\tadd\tr14d,ecx\n\tpxor\txmm7,xmm6\n\tror\tr13d,14\n\tmov\tecx,r14d\n\tmov\tr12d,r11d\n\tror\tr14d,9\n\tpsrlq\txmm6,2\n\txor\tr13d,r10d\n\txor\tr12d,eax\n\tpxor\txmm7,xmm6\n\tror\tr13d,5\n\txor\tr14d,ecx\n\tand\tr12d,r10d\n\tpshufd\txmm7,xmm7,128\n\txor\tr13d,r10d\n\tadd\tebx,DWORD PTR[24+rsp]\n\tmov\tr15d,ecx\n\tpsrldq\txmm7,8\n\txor\tr12d,eax\n\tror\tr14d,11\n\txor\tr15d,edx\n\tadd\tebx,r12d\n\tror\tr13d,6\n\tpaddd\txmm1,xmm7\n\tand\tedi,r15d\n\txor\tr14d,ecx\n\tadd\tebx,r13d\n\tpshufd\txmm7,xmm1,80\n\txor\tedi,edx\n\tror\tr14d,2\n\tadd\tr9d,ebx\n\tmovdqa\txmm6,xmm7\n\tadd\tebx,edi\n\tmov\tr13d,r9d\n\tpsrld\txmm7,10\n\tadd\tr14d,ebx\n\tror\tr13d,14\n\tpsrlq\txmm6,17\n\tmov\tebx,r14d\n\tmov\tr12d,r10d\n\tpxor\txmm7,xmm6\n\tror\tr14d,9\n\txor\tr13d,r9d\n\txor\tr12d,r11d\n\tror\tr13d,5\n\txor\tr14d,ebx\n\tpsrlq\txmm6,2\n\tand\tr12d,r9d\n\txor\tr13d,r9d\n\tadd\teax,DWORD PTR[28+rsp]\n\tpxor\txmm7,xmm6\n\tmov\tedi,ebx\n\txor\tr12d,r11d\n\tror\tr14d,11\n\tpshufd\txmm7,xmm7,8\n\txor\tedi,ecx\n\tadd\teax,r12d\n\tmovdqa\txmm6,XMMWORD PTR[16+rsi]\n\tror\tr13d,6\n\tand\tr15d,edi\n\tpslldq\txmm7,8\n\txor\tr14d,ebx\n\tadd\teax,r13d\n\txor\tr15d,ecx\n\tpaddd\txmm1,xmm7\n\tror\tr14d,2\n\tadd\tr8d,eax\n\tadd\teax,r15d\n\tpaddd\txmm6,xmm1\n\tmov\tr13d,r8d\n\tadd\tr14d,eax\n\tmovdqa\tXMMWORD PTR[16+rsp],xmm6\n\tror\tr13d,14\n\tmovdqa\txmm4,xmm3\n\tmov\teax,r14d\n\tmov\tr12d,r9d\n\tmovdqa\txmm7,xmm1\n\tror\tr14d,9\n\txor\tr13d,r8d\n\txor\tr12d,r10d\n\tror\tr13d,5\n\txor\tr14d,eax\nDB\t102,15,58,15,226,4\n\tand\tr12d,r8d\n\txor\tr13d,r8d\nDB\t102,15,58,15,248,4\n\tadd\tr11d,DWORD PTR[32+rsp]\n\tmov\tr15d,eax\n\txor\tr12d,r10d\n\tror\tr14d,11\n\tmovdqa\txmm5,xmm4\n\txor\tr15d,ebx\n\tadd\tr11d,r12d\n\tmovdqa\txmm6,xmm4\n\tror\tr13d,6\n\tand\tedi,r15d\n\tpsrld\txmm4,3\n\txor\tr14d,eax\n\tadd\tr11d,r13d\n\txor\tedi,ebx\n\tpaddd\txmm2,xmm7\n\tror\tr14d,2\n\tadd\tedx,r11d\n\tpsrld\txmm6,7\n\tadd\tr11d,edi\n\tmov\tr13d,edx\n\tpshufd\txmm7,xmm1,250\n\tadd\tr14d,r11d\n\tror\tr13d,14\n\tpslld\txmm5,14\n\tmov\tr11d,r14d\n\tmov\tr12d,r8d\n\tpxor\txmm4,xmm6\n\tror\tr14d,9\n\txor\tr13d,edx\n\txor\tr12d,r9d\n\tror\tr13d,5\n\tpsrld\txmm6,11\n\txor\tr14d,r11d\n\tpxor\txmm4,xmm5\n\tand\tr12d,edx\n\txor\tr13d,edx\n\tpslld\txmm5,11\n\tadd\tr10d,DWORD PTR[36+rsp]\n\tmov\tedi,r11d\n\tpxor\txmm4,xmm6\n\txor\tr12d,r9d\n\tror\tr14d,11\n\tmovdqa\txmm6,xmm7\n\txor\tedi,eax\n\tadd\tr10d,r12d\n\tpxor\txmm4,xmm5\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,r11d\n\tpsrld\txmm7,10\n\tadd\tr10d,r13d\n\txor\tr15d,eax\n\tpaddd\txmm2,xmm4\n\tror\tr14d,2\n\tadd\tecx,r10d\n\tpsrlq\txmm6,17\n\tadd\tr10d,r15d\n\tmov\tr13d,ecx\n\tadd\tr14d,r10d\n\tpxor\txmm7,xmm6\n\tror\tr13d,14\n\tmov\tr10d,r14d\n\tmov\tr12d,edx\n\tror\tr14d,9\n\tpsrlq\txmm6,2\n\txor\tr13d,ecx\n\txor\tr12d,r8d\n\tpxor\txmm7,xmm6\n\tror\tr13d,5\n\txor\tr14d,r10d\n\tand\tr12d,ecx\n\tpshufd\txmm7,xmm7,128\n\txor\tr13d,ecx\n\tadd\tr9d,DWORD PTR[40+rsp]\n\tmov\tr15d,r10d\n\tpsrldq\txmm7,8\n\txor\tr12d,r8d\n\tror\tr14d,11\n\txor\tr15d,r11d\n\tadd\tr9d,r12d\n\tror\tr13d,6\n\tpaddd\txmm2,xmm7\n\tand\tedi,r15d\n\txor\tr14d,r10d\n\tadd\tr9d,r13d\n\tpshufd\txmm7,xmm2,80\n\txor\tedi,r11d\n\tror\tr14d,2\n\tadd\tebx,r9d\n\tmovdqa\txmm6,xmm7\n\tadd\tr9d,edi\n\tmov\tr13d,ebx\n\tpsrld\txmm7,10\n\tadd\tr14d,r9d\n\tror\tr13d,14\n\tpsrlq\txmm6,17\n\tmov\tr9d,r14d\n\tmov\tr12d,ecx\n\tpxor\txmm7,xmm6\n\tror\tr14d,9\n\txor\tr13d,ebx\n\txor\tr12d,edx\n\tror\tr13d,5\n\txor\tr14d,r9d\n\tpsrlq\txmm6,2\n\tand\tr12d,ebx\n\txor\tr13d,ebx\n\tadd\tr8d,DWORD PTR[44+rsp]\n\tpxor\txmm7,xmm6\n\tmov\tedi,r9d\n\txor\tr12d,edx\n\tror\tr14d,11\n\tpshufd\txmm7,xmm7,8\n\txor\tedi,r10d\n\tadd\tr8d,r12d\n\tmovdqa\txmm6,XMMWORD PTR[32+rsi]\n\tror\tr13d,6\n\tand\tr15d,edi\n\tpslldq\txmm7,8\n\txor\tr14d,r9d\n\tadd\tr8d,r13d\n\txor\tr15d,r10d\n\tpaddd\txmm2,xmm7\n\tror\tr14d,2\n\tadd\teax,r8d\n\tadd\tr8d,r15d\n\tpaddd\txmm6,xmm2\n\tmov\tr13d,eax\n\tadd\tr14d,r8d\n\tmovdqa\tXMMWORD PTR[32+rsp],xmm6\n\tror\tr13d,14\n\tmovdqa\txmm4,xmm0\n\tmov\tr8d,r14d\n\tmov\tr12d,ebx\n\tmovdqa\txmm7,xmm2\n\tror\tr14d,9\n\txor\tr13d,eax\n\txor\tr12d,ecx\n\tror\tr13d,5\n\txor\tr14d,r8d\nDB\t102,15,58,15,227,4\n\tand\tr12d,eax\n\txor\tr13d,eax\nDB\t102,15,58,15,249,4\n\tadd\tedx,DWORD PTR[48+rsp]\n\tmov\tr15d,r8d\n\txor\tr12d,ecx\n\tror\tr14d,11\n\tmovdqa\txmm5,xmm4\n\txor\tr15d,r9d\n\tadd\tedx,r12d\n\tmovdqa\txmm6,xmm4\n\tror\tr13d,6\n\tand\tedi,r15d\n\tpsrld\txmm4,3\n\txor\tr14d,r8d\n\tadd\tedx,r13d\n\txor\tedi,r9d\n\tpaddd\txmm3,xmm7\n\tror\tr14d,2\n\tadd\tr11d,edx\n\tpsrld\txmm6,7\n\tadd\tedx,edi\n\tmov\tr13d,r11d\n\tpshufd\txmm7,xmm2,250\n\tadd\tr14d,edx\n\tror\tr13d,14\n\tpslld\txmm5,14\n\tmov\tedx,r14d\n\tmov\tr12d,eax\n\tpxor\txmm4,xmm6\n\tror\tr14d,9\n\txor\tr13d,r11d\n\txor\tr12d,ebx\n\tror\tr13d,5\n\tpsrld\txmm6,11\n\txor\tr14d,edx\n\tpxor\txmm4,xmm5\n\tand\tr12d,r11d\n\txor\tr13d,r11d\n\tpslld\txmm5,11\n\tadd\tecx,DWORD PTR[52+rsp]\n\tmov\tedi,edx\n\tpxor\txmm4,xmm6\n\txor\tr12d,ebx\n\tror\tr14d,11\n\tmovdqa\txmm6,xmm7\n\txor\tedi,r8d\n\tadd\tecx,r12d\n\tpxor\txmm4,xmm5\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,edx\n\tpsrld\txmm7,10\n\tadd\tecx,r13d\n\txor\tr15d,r8d\n\tpaddd\txmm3,xmm4\n\tror\tr14d,2\n\tadd\tr10d,ecx\n\tpsrlq\txmm6,17\n\tadd\tecx,r15d\n\tmov\tr13d,r10d\n\tadd\tr14d,ecx\n\tpxor\txmm7,xmm6\n\tror\tr13d,14\n\tmov\tecx,r14d\n\tmov\tr12d,r11d\n\tror\tr14d,9\n\tpsrlq\txmm6,2\n\txor\tr13d,r10d\n\txor\tr12d,eax\n\tpxor\txmm7,xmm6\n\tror\tr13d,5\n\txor\tr14d,ecx\n\tand\tr12d,r10d\n\tpshufd\txmm7,xmm7,128\n\txor\tr13d,r10d\n\tadd\tebx,DWORD PTR[56+rsp]\n\tmov\tr15d,ecx\n\tpsrldq\txmm7,8\n\txor\tr12d,eax\n\tror\tr14d,11\n\txor\tr15d,edx\n\tadd\tebx,r12d\n\tror\tr13d,6\n\tpaddd\txmm3,xmm7\n\tand\tedi,r15d\n\txor\tr14d,ecx\n\tadd\tebx,r13d\n\tpshufd\txmm7,xmm3,80\n\txor\tedi,edx\n\tror\tr14d,2\n\tadd\tr9d,ebx\n\tmovdqa\txmm6,xmm7\n\tadd\tebx,edi\n\tmov\tr13d,r9d\n\tpsrld\txmm7,10\n\tadd\tr14d,ebx\n\tror\tr13d,14\n\tpsrlq\txmm6,17\n\tmov\tebx,r14d\n\tmov\tr12d,r10d\n\tpxor\txmm7,xmm6\n\tror\tr14d,9\n\txor\tr13d,r9d\n\txor\tr12d,r11d\n\tror\tr13d,5\n\txor\tr14d,ebx\n\tpsrlq\txmm6,2\n\tand\tr12d,r9d\n\txor\tr13d,r9d\n\tadd\teax,DWORD PTR[60+rsp]\n\tpxor\txmm7,xmm6\n\tmov\tedi,ebx\n\txor\tr12d,r11d\n\tror\tr14d,11\n\tpshufd\txmm7,xmm7,8\n\txor\tedi,ecx\n\tadd\teax,r12d\n\tmovdqa\txmm6,XMMWORD PTR[48+rsi]\n\tror\tr13d,6\n\tand\tr15d,edi\n\tpslldq\txmm7,8\n\txor\tr14d,ebx\n\tadd\teax,r13d\n\txor\tr15d,ecx\n\tpaddd\txmm3,xmm7\n\tror\tr14d,2\n\tadd\tr8d,eax\n\tadd\teax,r15d\n\tpaddd\txmm6,xmm3\n\tmov\tr13d,r8d\n\tadd\tr14d,eax\n\tmovdqa\tXMMWORD PTR[48+rsp],xmm6\n\tcmp\tBYTE PTR[67+rsi],0\n\tjne\t$L$ssse3_00_47\n\tror\tr13d,14\n\tmov\teax,r14d\n\tmov\tr12d,r9d\n\tror\tr14d,9\n\txor\tr13d,r8d\n\txor\tr12d,r10d\n\tror\tr13d,5\n\txor\tr14d,eax\n\tand\tr12d,r8d\n\txor\tr13d,r8d\n\tadd\tr11d,DWORD PTR[rsp]\n\tmov\tr15d,eax\n\txor\tr12d,r10d\n\tror\tr14d,11\n\txor\tr15d,ebx\n\tadd\tr11d,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,eax\n\tadd\tr11d,r13d\n\txor\tedi,ebx\n\tror\tr14d,2\n\tadd\tedx,r11d\n\tadd\tr11d,edi\n\tmov\tr13d,edx\n\tadd\tr14d,r11d\n\tror\tr13d,14\n\tmov\tr11d,r14d\n\tmov\tr12d,r8d\n\tror\tr14d,9\n\txor\tr13d,edx\n\txor\tr12d,r9d\n\tror\tr13d,5\n\txor\tr14d,r11d\n\tand\tr12d,edx\n\txor\tr13d,edx\n\tadd\tr10d,DWORD PTR[4+rsp]\n\tmov\tedi,r11d\n\txor\tr12d,r9d\n\tror\tr14d,11\n\txor\tedi,eax\n\tadd\tr10d,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,r11d\n\tadd\tr10d,r13d\n\txor\tr15d,eax\n\tror\tr14d,2\n\tadd\tecx,r10d\n\tadd\tr10d,r15d\n\tmov\tr13d,ecx\n\tadd\tr14d,r10d\n\tror\tr13d,14\n\tmov\tr10d,r14d\n\tmov\tr12d,edx\n\tror\tr14d,9\n\txor\tr13d,ecx\n\txor\tr12d,r8d\n\tror\tr13d,5\n\txor\tr14d,r10d\n\tand\tr12d,ecx\n\txor\tr13d,ecx\n\tadd\tr9d,DWORD PTR[8+rsp]\n\tmov\tr15d,r10d\n\txor\tr12d,r8d\n\tror\tr14d,11\n\txor\tr15d,r11d\n\tadd\tr9d,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,r10d\n\tadd\tr9d,r13d\n\txor\tedi,r11d\n\tror\tr14d,2\n\tadd\tebx,r9d\n\tadd\tr9d,edi\n\tmov\tr13d,ebx\n\tadd\tr14d,r9d\n\tror\tr13d,14\n\tmov\tr9d,r14d\n\tmov\tr12d,ecx\n\tror\tr14d,9\n\txor\tr13d,ebx\n\txor\tr12d,edx\n\tror\tr13d,5\n\txor\tr14d,r9d\n\tand\tr12d,ebx\n\txor\tr13d,ebx\n\tadd\tr8d,DWORD PTR[12+rsp]\n\tmov\tedi,r9d\n\txor\tr12d,edx\n\tror\tr14d,11\n\txor\tedi,r10d\n\tadd\tr8d,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,r9d\n\tadd\tr8d,r13d\n\txor\tr15d,r10d\n\tror\tr14d,2\n\tadd\teax,r8d\n\tadd\tr8d,r15d\n\tmov\tr13d,eax\n\tadd\tr14d,r8d\n\tror\tr13d,14\n\tmov\tr8d,r14d\n\tmov\tr12d,ebx\n\tror\tr14d,9\n\txor\tr13d,eax\n\txor\tr12d,ecx\n\tror\tr13d,5\n\txor\tr14d,r8d\n\tand\tr12d,eax\n\txor\tr13d,eax\n\tadd\tedx,DWORD PTR[16+rsp]\n\tmov\tr15d,r8d\n\txor\tr12d,ecx\n\tror\tr14d,11\n\txor\tr15d,r9d\n\tadd\tedx,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,r8d\n\tadd\tedx,r13d\n\txor\tedi,r9d\n\tror\tr14d,2\n\tadd\tr11d,edx\n\tadd\tedx,edi\n\tmov\tr13d,r11d\n\tadd\tr14d,edx\n\tror\tr13d,14\n\tmov\tedx,r14d\n\tmov\tr12d,eax\n\tror\tr14d,9\n\txor\tr13d,r11d\n\txor\tr12d,ebx\n\tror\tr13d,5\n\txor\tr14d,edx\n\tand\tr12d,r11d\n\txor\tr13d,r11d\n\tadd\tecx,DWORD PTR[20+rsp]\n\tmov\tedi,edx\n\txor\tr12d,ebx\n\tror\tr14d,11\n\txor\tedi,r8d\n\tadd\tecx,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,edx\n\tadd\tecx,r13d\n\txor\tr15d,r8d\n\tror\tr14d,2\n\tadd\tr10d,ecx\n\tadd\tecx,r15d\n\tmov\tr13d,r10d\n\tadd\tr14d,ecx\n\tror\tr13d,14\n\tmov\tecx,r14d\n\tmov\tr12d,r11d\n\tror\tr14d,9\n\txor\tr13d,r10d\n\txor\tr12d,eax\n\tror\tr13d,5\n\txor\tr14d,ecx\n\tand\tr12d,r10d\n\txor\tr13d,r10d\n\tadd\tebx,DWORD PTR[24+rsp]\n\tmov\tr15d,ecx\n\txor\tr12d,eax\n\tror\tr14d,11\n\txor\tr15d,edx\n\tadd\tebx,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,ecx\n\tadd\tebx,r13d\n\txor\tedi,edx\n\tror\tr14d,2\n\tadd\tr9d,ebx\n\tadd\tebx,edi\n\tmov\tr13d,r9d\n\tadd\tr14d,ebx\n\tror\tr13d,14\n\tmov\tebx,r14d\n\tmov\tr12d,r10d\n\tror\tr14d,9\n\txor\tr13d,r9d\n\txor\tr12d,r11d\n\tror\tr13d,5\n\txor\tr14d,ebx\n\tand\tr12d,r9d\n\txor\tr13d,r9d\n\tadd\teax,DWORD PTR[28+rsp]\n\tmov\tedi,ebx\n\txor\tr12d,r11d\n\tror\tr14d,11\n\txor\tedi,ecx\n\tadd\teax,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,ebx\n\tadd\teax,r13d\n\txor\tr15d,ecx\n\tror\tr14d,2\n\tadd\tr8d,eax\n\tadd\teax,r15d\n\tmov\tr13d,r8d\n\tadd\tr14d,eax\n\tror\tr13d,14\n\tmov\teax,r14d\n\tmov\tr12d,r9d\n\tror\tr14d,9\n\txor\tr13d,r8d\n\txor\tr12d,r10d\n\tror\tr13d,5\n\txor\tr14d,eax\n\tand\tr12d,r8d\n\txor\tr13d,r8d\n\tadd\tr11d,DWORD PTR[32+rsp]\n\tmov\tr15d,eax\n\txor\tr12d,r10d\n\tror\tr14d,11\n\txor\tr15d,ebx\n\tadd\tr11d,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,eax\n\tadd\tr11d,r13d\n\txor\tedi,ebx\n\tror\tr14d,2\n\tadd\tedx,r11d\n\tadd\tr11d,edi\n\tmov\tr13d,edx\n\tadd\tr14d,r11d\n\tror\tr13d,14\n\tmov\tr11d,r14d\n\tmov\tr12d,r8d\n\tror\tr14d,9\n\txor\tr13d,edx\n\txor\tr12d,r9d\n\tror\tr13d,5\n\txor\tr14d,r11d\n\tand\tr12d,edx\n\txor\tr13d,edx\n\tadd\tr10d,DWORD PTR[36+rsp]\n\tmov\tedi,r11d\n\txor\tr12d,r9d\n\tror\tr14d,11\n\txor\tedi,eax\n\tadd\tr10d,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,r11d\n\tadd\tr10d,r13d\n\txor\tr15d,eax\n\tror\tr14d,2\n\tadd\tecx,r10d\n\tadd\tr10d,r15d\n\tmov\tr13d,ecx\n\tadd\tr14d,r10d\n\tror\tr13d,14\n\tmov\tr10d,r14d\n\tmov\tr12d,edx\n\tror\tr14d,9\n\txor\tr13d,ecx\n\txor\tr12d,r8d\n\tror\tr13d,5\n\txor\tr14d,r10d\n\tand\tr12d,ecx\n\txor\tr13d,ecx\n\tadd\tr9d,DWORD PTR[40+rsp]\n\tmov\tr15d,r10d\n\txor\tr12d,r8d\n\tror\tr14d,11\n\txor\tr15d,r11d\n\tadd\tr9d,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,r10d\n\tadd\tr9d,r13d\n\txor\tedi,r11d\n\tror\tr14d,2\n\tadd\tebx,r9d\n\tadd\tr9d,edi\n\tmov\tr13d,ebx\n\tadd\tr14d,r9d\n\tror\tr13d,14\n\tmov\tr9d,r14d\n\tmov\tr12d,ecx\n\tror\tr14d,9\n\txor\tr13d,ebx\n\txor\tr12d,edx\n\tror\tr13d,5\n\txor\tr14d,r9d\n\tand\tr12d,ebx\n\txor\tr13d,ebx\n\tadd\tr8d,DWORD PTR[44+rsp]\n\tmov\tedi,r9d\n\txor\tr12d,edx\n\tror\tr14d,11\n\txor\tedi,r10d\n\tadd\tr8d,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,r9d\n\tadd\tr8d,r13d\n\txor\tr15d,r10d\n\tror\tr14d,2\n\tadd\teax,r8d\n\tadd\tr8d,r15d\n\tmov\tr13d,eax\n\tadd\tr14d,r8d\n\tror\tr13d,14\n\tmov\tr8d,r14d\n\tmov\tr12d,ebx\n\tror\tr14d,9\n\txor\tr13d,eax\n\txor\tr12d,ecx\n\tror\tr13d,5\n\txor\tr14d,r8d\n\tand\tr12d,eax\n\txor\tr13d,eax\n\tadd\tedx,DWORD PTR[48+rsp]\n\tmov\tr15d,r8d\n\txor\tr12d,ecx\n\tror\tr14d,11\n\txor\tr15d,r9d\n\tadd\tedx,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,r8d\n\tadd\tedx,r13d\n\txor\tedi,r9d\n\tror\tr14d,2\n\tadd\tr11d,edx\n\tadd\tedx,edi\n\tmov\tr13d,r11d\n\tadd\tr14d,edx\n\tror\tr13d,14\n\tmov\tedx,r14d\n\tmov\tr12d,eax\n\tror\tr14d,9\n\txor\tr13d,r11d\n\txor\tr12d,ebx\n\tror\tr13d,5\n\txor\tr14d,edx\n\tand\tr12d,r11d\n\txor\tr13d,r11d\n\tadd\tecx,DWORD PTR[52+rsp]\n\tmov\tedi,edx\n\txor\tr12d,ebx\n\tror\tr14d,11\n\txor\tedi,r8d\n\tadd\tecx,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,edx\n\tadd\tecx,r13d\n\txor\tr15d,r8d\n\tror\tr14d,2\n\tadd\tr10d,ecx\n\tadd\tecx,r15d\n\tmov\tr13d,r10d\n\tadd\tr14d,ecx\n\tror\tr13d,14\n\tmov\tecx,r14d\n\tmov\tr12d,r11d\n\tror\tr14d,9\n\txor\tr13d,r10d\n\txor\tr12d,eax\n\tror\tr13d,5\n\txor\tr14d,ecx\n\tand\tr12d,r10d\n\txor\tr13d,r10d\n\tadd\tebx,DWORD PTR[56+rsp]\n\tmov\tr15d,ecx\n\txor\tr12d,eax\n\tror\tr14d,11\n\txor\tr15d,edx\n\tadd\tebx,r12d\n\tror\tr13d,6\n\tand\tedi,r15d\n\txor\tr14d,ecx\n\tadd\tebx,r13d\n\txor\tedi,edx\n\tror\tr14d,2\n\tadd\tr9d,ebx\n\tadd\tebx,edi\n\tmov\tr13d,r9d\n\tadd\tr14d,ebx\n\tror\tr13d,14\n\tmov\tebx,r14d\n\tmov\tr12d,r10d\n\tror\tr14d,9\n\txor\tr13d,r9d\n\txor\tr12d,r11d\n\tror\tr13d,5\n\txor\tr14d,ebx\n\tand\tr12d,r9d\n\txor\tr13d,r9d\n\tadd\teax,DWORD PTR[60+rsp]\n\tmov\tedi,ebx\n\txor\tr12d,r11d\n\tror\tr14d,11\n\txor\tedi,ecx\n\tadd\teax,r12d\n\tror\tr13d,6\n\tand\tr15d,edi\n\txor\tr14d,ebx\n\tadd\teax,r13d\n\txor\tr15d,ecx\n\tror\tr14d,2\n\tadd\tr8d,eax\n\tadd\teax,r15d\n\tmov\tr13d,r8d\n\tadd\tr14d,eax\n\tmov\trdi,QWORD PTR[((-64))+rbp]\n\tmov\teax,r14d\n\tmov\trsi,QWORD PTR[((-56))+rbp]\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tadd\teax,DWORD PTR[rdi]\n\tadd\tebx,DWORD PTR[4+rdi]\n\tadd\tecx,DWORD PTR[8+rdi]\n\tadd\tedx,DWORD PTR[12+rdi]\n\tadd\tr8d,DWORD PTR[16+rdi]\n\tadd\tr9d,DWORD PTR[20+rdi]\n\tadd\tr10d,DWORD PTR[24+rdi]\n\tadd\tr11d,DWORD PTR[28+rdi]\n\n\tlea\trsi,QWORD PTR[64+rsi]\n\tcmp\trsi,QWORD PTR[((-48))+rbp]\n\n\tmov\tDWORD PTR[rdi],eax\n\tmov\tDWORD PTR[4+rdi],ebx\n\tmov\tDWORD PTR[8+rdi],ecx\n\tmov\tDWORD PTR[12+rdi],edx\n\tmov\tDWORD PTR[16+rdi],r8d\n\tmov\tDWORD PTR[20+rdi],r9d\n\tmov\tDWORD PTR[24+rdi],r10d\n\tmov\tDWORD PTR[28+rdi],r11d\n\tjb\t$L$loop_ssse3\n\n\txorps\txmm0,xmm0\n\tmovaps\tXMMWORD PTR[rsp],xmm0\n\tmovaps\tXMMWORD PTR[16+rsp],xmm0\n\tmovaps\tXMMWORD PTR[32+rsp],xmm0\n\tmovaps\tXMMWORD PTR[48+rsp],xmm0\n\tmovaps\txmm6,XMMWORD PTR[((-128))+rbp]\n\tmovaps\txmm7,XMMWORD PTR[((-112))+rbp]\n\tmovaps\txmm8,XMMWORD PTR[((-96))+rbp]\n\tmovaps\txmm9,XMMWORD PTR[((-80))+rbp]\n\tmov\tr15,QWORD PTR[((-40))+rbp]\n\tmov\tr14,QWORD PTR[((-32))+rbp]\n\tmov\tr13,QWORD PTR[((-24))+rbp]\n\tmov\tr12,QWORD PTR[((-16))+rbp]\n\tmov\trbx,QWORD PTR[((-8))+rbp]\n\tmov\trsp,rbp\n\n\tpop\trbp\n\n$L$SEH_epilogue_blst_sha256_block_data_order::\n\tmov\trdi,QWORD PTR[8+rsp]\t;WIN64 epilogue\n\tmov\trsi,QWORD PTR[16+rsp]\n\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\n\n$L$SEH_end_blst_sha256_block_data_order::\nblst_sha256_block_data_order\tENDP\nPUBLIC\tblst_sha256_emit\n\n\nALIGN\t16\nblst_sha256_emit\tPROC PUBLIC\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rdx]\n\tmov\tr9,QWORD PTR[8+rdx]\n\tmov\tr10,QWORD PTR[16+rdx]\n\tbswap\tr8\n\tmov\tr11,QWORD PTR[24+rdx]\n\tbswap\tr9\n\tmov\tDWORD PTR[4+rcx],r8d\n\tbswap\tr10\n\tmov\tDWORD PTR[12+rcx],r9d\n\tbswap\tr11\n\tmov\tDWORD PTR[20+rcx],r10d\n\tshr\tr8,32\n\tmov\tDWORD PTR[28+rcx],r11d\n\tshr\tr9,32\n\tmov\tDWORD PTR[rcx],r8d\n\tshr\tr10,32\n\tmov\tDWORD PTR[8+rcx],r9d\n\tshr\tr11,32\n\tmov\tDWORD PTR[16+rcx],r10d\n\tmov\tDWORD PTR[24+rcx],r11d\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nblst_sha256_emit\tENDP\n\nPUBLIC\tblst_sha256_bcopy\n\n\nALIGN\t16\nblst_sha256_bcopy\tPROC PUBLIC\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tsub\trcx,rdx\n$L$oop_bcopy::\n\tmovzx\teax,BYTE PTR[rdx]\n\tlea\trdx,QWORD PTR[1+rdx]\n\tmov\tBYTE PTR[((-1))+rdx*1+rcx],al\n\tdec\tr8\n\tjnz\t$L$oop_bcopy\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nblst_sha256_bcopy\tENDP\n\nPUBLIC\tblst_sha256_hcopy\n\n\nALIGN\t16\nblst_sha256_hcopy\tPROC PUBLIC\n\tDB\t243,15,30,250\n\nifdef\t__SGX_LVI_HARDENING__\n\tlfence\nendif\n\tmov\tr8,QWORD PTR[rdx]\n\tmov\tr9,QWORD PTR[8+rdx]\n\tmov\tr10,QWORD PTR[16+rdx]\n\tmov\tr11,QWORD PTR[24+rdx]\n\tmov\tQWORD PTR[rcx],r8\n\tmov\tQWORD PTR[8+rcx],r9\n\tmov\tQWORD PTR[16+rcx],r10\n\tmov\tQWORD PTR[24+rcx],r11\n\t\nifdef\t__SGX_LVI_HARDENING__\n\tpop\trdx\n\tlfence\n\tjmp\trdx\n\tud2\nelse\n\tDB\t0F3h,0C3h\nendif\nblst_sha256_hcopy\tENDP\n.text$\tENDS\n.pdata\tSEGMENT READONLY ALIGN(4)\nALIGN\t4\n\tDD\timagerel $L$SEH_begin_blst_sha256_block_data_order_shaext\n\tDD\timagerel $L$SEH_body_blst_sha256_block_data_order_shaext\n\tDD\timagerel $L$SEH_info_blst_sha256_block_data_order_shaext_prologue\n\n\tDD\timagerel $L$SEH_body_blst_sha256_block_data_order_shaext\n\tDD\timagerel $L$SEH_epilogue_blst_sha256_block_data_order_shaext\n\tDD\timagerel $L$SEH_info_blst_sha256_block_data_order_shaext_body\n\n\tDD\timagerel $L$SEH_epilogue_blst_sha256_block_data_order_shaext\n\tDD\timagerel $L$SEH_end_blst_sha256_block_data_order_shaext\n\tDD\timagerel $L$SEH_info_blst_sha256_block_data_order_shaext_epilogue\n\n\tDD\timagerel $L$SEH_begin_blst_sha256_block_data_order\n\tDD\timagerel $L$SEH_body_blst_sha256_block_data_order\n\tDD\timagerel $L$SEH_info_blst_sha256_block_data_order_prologue\n\n\tDD\timagerel $L$SEH_body_blst_sha256_block_data_order\n\tDD\timagerel $L$SEH_epilogue_blst_sha256_block_data_order\n\tDD\timagerel $L$SEH_info_blst_sha256_block_data_order_body\n\n\tDD\timagerel $L$SEH_epilogue_blst_sha256_block_data_order\n\tDD\timagerel $L$SEH_end_blst_sha256_block_data_order\n\tDD\timagerel $L$SEH_info_blst_sha256_block_data_order_epilogue\n\n.pdata\tENDS\n.xdata\tSEGMENT READONLY ALIGN(8)\nALIGN\t8\n$L$SEH_info_blst_sha256_block_data_order_shaext_prologue::\nDB\t1,4,6,005h\nDB\t4,074h,2,0\nDB\t4,064h,3,0\nDB\t4,053h\nDB\t1,050h\n\tDD\t0,0\n$L$SEH_info_blst_sha256_block_data_order_shaext_body::\nDB\t1,0,17,85\nDB\t000h,068h,000h,000h\nDB\t000h,078h,001h,000h\nDB\t000h,088h,002h,000h\nDB\t000h,098h,003h,000h\nDB\t000h,0a8h,004h,000h\nDB\t000h,074h,00ch,000h\nDB\t000h,064h,00dh,000h\nDB\t000h,053h\nDB\t000h,092h\nDB\t000h,050h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_blst_sha256_block_data_order_shaext_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n$L$SEH_info_blst_sha256_block_data_order_prologue::\nDB\t1,4,6,005h\nDB\t4,074h,2,0\nDB\t4,064h,3,0\nDB\t4,053h\nDB\t1,050h\n\tDD\t0,0\n$L$SEH_info_blst_sha256_block_data_order_body::\nDB\t1,0,25,133\nDB\t000h,068h,000h,000h\nDB\t000h,078h,001h,000h\nDB\t000h,088h,002h,000h\nDB\t000h,098h,003h,000h\nDB\t000h,0f4h,00bh,000h\nDB\t000h,0e4h,00ch,000h\nDB\t000h,0d4h,00dh,000h\nDB\t000h,0c4h,00eh,000h\nDB\t000h,034h,00fh,000h\nDB\t000h,074h,012h,000h\nDB\t000h,064h,013h,000h\nDB\t000h,053h\nDB\t000h,0f2h\nDB\t000h,050h\nDB\t000h,000h,000h,000h,000h,000h\nDB\t000h,000h,000h,000h\n$L$SEH_info_blst_sha256_block_data_order_epilogue::\nDB\t1,0,4,0\nDB\t000h,074h,001h,000h\nDB\t000h,064h,002h,000h\nDB\t000h,000h,000h,000h\n\n\n.xdata\tENDS\nEND\n"
  },
  {
    "path": "build.bat",
    "content": "@echo off\nSETLOCAL\nset PATH=%windir%\\system32;%PATH% &:: override msys if it's on the PATH\nset TOP=%~dp0\nset CFLAGS=/nologo /c /O2 /Zi /Fdblst.pdb /W4\ncl 2>&1 | find \"for ARM64\" > nul:\nIF ERRORLEVEL 1 (\n    set arm64=no\n    FOR %%F IN (%TOP%build\\win64\\*-x86_64.asm) DO (\n        ml64 /nologo /c /Cp /Cx /Zi %%F || EXIT /B\n    )\n) ELSE (\n    set arm64=yes\n    FOR %%F IN (%TOP%build\\win64\\*-armv8.asm) DO (\n        armasm64 -nologo %%F || EXIT /B\n    )\n)\nSETLOCAL ENABLEDELAYEDEXPANSION\nset static=/out:blst.lib\nset shared=\nset arm64x=\nFOR %%O IN (%*) DO (\n    set opt=%%O\n    IF \"!opt!\" == \"-shared\" (\n        IF [!shared!] EQU [] set shared=/out:blst.dll\n    ) ELSE IF \"!opt!\" == \"-dll\" (\n        IF [!shared!] EQU [] set shared=/out:blst.dll\n    ) ELSE IF \"!opt:~0,5!\" == \"/out:\" (\n\tIF \"!opt:~-4!\" == \".dll\" (set shared=!opt!) ELSE (set static=!opt!)\n    ) ELSE IF \"!opt!\" == \"-arm64x\" (\n        set arm64x=%arm64%\n    )\n)\nIF [%shared%] NEQ [] (\n    cl %CFLAGS% /MD /D__BLST_DLL_MAIN__ %TOP%src\\server.c || EXIT /B\n    set ld=\n    FOR /F \"usebackq delims=\" %%F IN (`where link`) DO (\n        IF \"!ld!\" == \"\" (\n            \"%%F\" 2>&1 | find \"Linker\" > nul:\n            IF !ERRORLEVEL! EQU 0 set ld=\"%%F\"\n        )\n    )\n    IF [%arm64x%] NEQ [yes] (\n        !ld! /nologo /debug /dll /entry:DllMain /incremental:no %shared% ^\n             /def:%TOP%build\\win64\\blst.def *.obj kernel32.lib && del *.obj\n    ) ELSE (\n        lib /nologo /out:blst_arm64.lib *.obj && del *.obj || EXIT /B\n        FOR %%F IN (%TOP%build\\win64\\*-armv8.asm) DO (\n            armasm64 -nologo -machine arm64ec -nowarn %%F || EXIT /B\n        )\n        cl /arm64EC %CFLAGS% /MD /D__BLST_DLL_MAIN__ %TOP%src\\server.c || EXIT /B\n        !ld! /nologo /machine:arm64x /dll /noentry %shared% ^\n             /def:%TOP%build\\win64\\blst.def *.obj ^\n             /defArm64Native:%TOP%build\\win64\\blst.def blst_arm64.lib ^\n             kernel32.lib && del *.obj blst_arm64.lib\n    )\n) ELSE (\n    cl %CFLAGS% /MT /Zl %TOP%src\\server.c || EXIT /B\n    lib /nologo %static% *.obj && del *.obj\n)\nENDLOCAL\nEXIT /B\n"
  },
  {
    "path": "build.sh",
    "content": "#!/bin/sh\nset -e\n#\n# The script allows to override 'CC', 'CFLAGS' and 'flavour' at command\n# line, as well as specify additional compiler flags. For example to\n# compile for x32:\n#\n#\t/some/where/build.sh flavour=elf32 -mx32\n#\n# To cross-compile for mingw/Windows:\n#\n#\t/some/where/build.sh flavour=mingw64 CC=x86_64-w64-mingw32-gcc\n#\n# In addition script recognizes -shared flag and creates shared library\n# alongside libblst.a.\n#\n# To cross-compile for WebAssembly with Emscripten SDK:\n#\n#\t/some/where/build.sh CROSS_COMPILE=em\n\n[ -d /usr/xpg4/bin ] && PATH=/usr/xpg4/bin:$PATH # Solaris\n\nTOP=`dirname $0`\n\n# if -Werror stands in the way, bypass with -Wno-error on command line,\n# or suppress specific one with -Wno-<problematic-warning>\nCFLAGS=${CFLAGS:--O2 -fno-builtin -fPIC -Wall -Wextra -Werror}\nPERL=${PERL:-perl}\nunset cflags shared dll\n\nwhile [ \"x$1\" != \"x\" ]; do\n    case $1 in\n        -shared)    shared=1;;\n        -dll)       shared=1; dll=\".dll\";;\n        -m*)        CFLAGS=\"$CFLAGS $1\";;\n        -target|-arch)\n                    if expr \"$CFLAGS\" : \".*-arch \" >/dev/null; then\n                        cflags=\"$cflags $1 $2\"\n                    else\n                        CFLAGS=\"$CFLAGS $1 $2\"\n                    fi\n                    shift;;\n        -*target*)  CFLAGS=\"$CFLAGS $1\";;\n        -*)         cflags=\"$cflags $1\";;\n        *=*)        eval \"$1\";;\n    esac\n    shift\ndone\n\nif [ \"x$CC\" = \"x\" ]; then\n    CC=gcc\n    which ${CROSS_COMPILE}cc >/dev/null 2>&1 && CC=cc\nfi\nif which ${CROSS_COMPILE}${CC} >/dev/null 2>&1; then\n    CC=${CROSS_COMPILE}${CC}\nfi\nif [ \"x$CROSS_COMPILE\" = \"x\" ]; then\n    CROSS_COMPILE=`echo $CC |\n                   awk '{ print substr($1,0,match($1,\"-(g?cc|clang)$\")) }' 2>/dev/null`\n    # fix up android prefix...\n    CROSS_COMPILE=`echo $CROSS_COMPILE |\n                   awk '{ off=match($1,\"-android[0-9]+-\");\n                          if (off) { printf \"%sandroid-\\n\",substr($1,0,off) }\n                          else     { print $1 } }'`\nfi\n\npredefs=`(${CC} ${CFLAGS} -dM -E -x c /dev/null || true) 2>/dev/null`\n\nif [ -z \"${CROSS_COMPILE}${AR}\" ] && echo ${predefs} | grep -q clang; then\n    search_dirs=`${CC} -print-search-dirs  | awk -F= '/^programs:/{print$2}' | \\\n                 (sed -E -e 's/([a-z]):\\\\\\/\\/\\1\\//gi' -e 'y/\\\\\\;/\\/:/' 2>/dev/null || true)`\n    if [ -n \"$search_dirs\" ] && \\\n       env PATH=\"$search_dirs:$PATH\" which llvm-ar > /dev/null 2>&1; then\n        PATH=\"$search_dirs:$PATH\"\n        AR=llvm-ar\n        RANLIB=llvm-ranlib\n    fi\nfi\nAR=${AR:-${CROSS_COMPILE}ar}\nRANLIB=${RANLIB:-${CROSS_COMPILE}ranlib}\n\nif [ -z \"${flavour}\" ]; then\n    if echo ${predefs} | grep -q __APPLE__; then\n        flavour=macosx\n    elif echo ${predefs} | grep -q _WIN32; then\n        flavour=mingw64\n        if [ $shared ]; then\n            cflags=\"$cflags -D__BLST_DLL_MAIN__\"\n        fi\n    else\n        flavour=elf\n    fi\nfi\nif echo ${predefs} | grep -q x86_64; then\n    case `uname -s` in\n        Darwin) if [ \"`sysctl -n hw.optional.adx 2>/dev/null`\" = \"1\" ]; then\n                    cflags=\"-D__ADX__ $cflags\"\n                fi;;\n        *)      if (grep -q -e '^flags.*\\badx\\b' /proc/cpuinfo) 2>/dev/null; then\n                    cflags=\"-D__ADX__ $cflags\"\n                fi;;\n    esac\nfi\nif echo ${predefs} | grep -q __AVX__; then\n    cflags=\"$cflags -mno-avx\" # avoid costly transitions\nfi\nif echo ${predefs} | grep -E -q 'x86_64|aarch64'; then :; else\n    cflags=\"$cflags -D__BLST_NO_ASM__\"\nfi\n\nCFLAGS=\"$CFLAGS $cflags\"\nTMPDIR=${TMPDIR:-/tmp}\n\nrm -f libblst.a\ntrap '[ $? -ne 0 ] && rm -f libblst.a; rm -f *.o ${TMPDIR}/*.blst.$$' 0\n\n(set -x; ${CC} ${CFLAGS} -c ${TOP}/src/server.c)\n(set -x; ${CC} ${CFLAGS} -c ${TOP}/build/assembly.S)\n(set -x; ${AR} rc libblst.a *.o)\nwhich ${RANLIB} > /dev/null 2>&1 && (set -x; ${RANLIB} libblst.a)\n\nif [ $shared ]; then\n    case $flavour in\n        macosx) (set -x; ${CC} -dynamiclib -o libblst$dll.dylib \\\n                               -all_load libblst.a ${CFLAGS}); exit 0;;\n        mingw*) sharedlib=\"blst.dll ${TOP}/build/win64/blst.def\"\n                CFLAGS=\"${CFLAGS} -Wl,--entry=DllMain -nostartfiles\";;\n        *)      sharedlib=libblst$dll.so\n                CFLAGS=\"${CFLAGS} -Wl,-Bsymbolic\";;\n    esac\n    (set -x; ${CC} -shared -o $sharedlib \\\n                   -Wl,--whole-archive,libblst.a,--no-whole-archive ${CFLAGS})\nfi\n"
  },
  {
    "path": "build.zig",
    "content": "const std = @import(\"std\");\n\npub fn build(b: *std.Build) void {\n    const target = b.standardTargetOptions(.{});\n    const optimize = b.standardOptimizeOption(.{});\n\n    const mod = b.addModule(\"blst\", .{\n        .root_source_file = b.path(\"bindings/zig/blst.zig\"),\n        .target = target,\n        .optimize = optimize,\n    });\n\n    const lib = b.addLibrary(.{\n        .name = \"blst\",\n        .linkage = .static,\n        .root_module = mod,\n    });\n\n    const cfiles = &[_][]const u8{\n        \"src/server.c\",\n        \"build/assembly.S\",\n    };\n\n    const cflags = &[_][]const u8{\n        \"-O2\", \"-ffreestanding\", \"-D__BLST_PORTABLE__\",\n        \"-D__BLST_NO_ASM__\",\n    };\n\n    switch (target.result.cpu.arch) {\n        .aarch64,\n        .x86_64  => lib.addCSourceFiles(.{\n                        .files = cfiles,\n                        .flags = cflags[0 .. cflags.len-1],\n                    }),\n        else     => lib.addCSourceFiles(.{\n                        .files = cfiles[0 .. cfiles.len-1],\n                        .flags = cflags,\n                    }),\n    }\n    if (target.result.os.tag == .windows) {\n        lib.linkLibC();\n    }\n\n    const tests = b.addTest(.{\n        .root_module = b.createModule(.{\n            .root_source_file = b.path(\"bindings/zig/tests.zig\"),\n            .target = target,\n            .optimize = optimize,\n            .imports = &.{.{ .name = \"blst\", .module = mod }},\n        }),\n    });\n\n    b.step(\"test\", \"Run test[s]\").dependOn(&b.addRunArtifact(tests).step);\n}\n"
  },
  {
    "path": "build.zig.zon",
    "content": ".{\n    .name = .blst,\n    .version = \"0.3.16\",\n    .minimum_zig_version = \"0.14.0\",\n    .paths = .{\n        \"build.zig\",\n        \"build.zig.zon\",\n        \"bindings/zig\",\n        \"src\",\n        \"build\",\n    },\n    .fingerprint = 0xa2dc4dc0d564fc7e,\n}\n"
  },
  {
    "path": "src/aggregate.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n/*\n * Usage pattern on single-processor system is\n *\n * blst_pairing_init(ctx, hash_or_encode, DST);\n * blst_pairing_aggregate_pk_in_g1(ctx, PK[0], aggregated_signature, msg[0]);\n * blst_pairing_aggregate_pk_in_g1(ctx, PK[1], NULL, msg[1]);\n * ...\n * blst_pairing_commit(ctx);\n * blst_pairing_finalverify(ctx, NULL);\n *\n ***********************************************************************\n * Usage pattern on multi-processor system is\n *\n *   blst_pairing_init(pk[0], hash_or_encode, DST);\n *   blst_pairing_init(pk[1], hash_or_encode, DST);\n *   ...\n * start threads each processing an N/nthreads slice of PKs and messages:\n *     blst_pairing_aggregate_pk_in_g1(pk[i], PK[i*n+0], NULL, msg[i*n+0]);\n *     blst_pairing_aggregate_pk_in_g1(pk[i], PK[i*n+1], NULL, msg[i*n+1]);\n *     ...\n *     blst_pairing_commit(pkx);\n *   ...\n * meanwhile in main thread\n *   blst_fp12 gtsig;\n *   blst_aggregated_in_g2(&gtsig, aggregated_signature);\n * join threads and merge their contexts:\n *   blst_pairing_merge(pk[0], pk[1]);\n *   blst_pairing_merge(pk[0], pk[2]);\n *   ...\n *   blst_pairing_finalverify(pk[0], gtsig);\n */\n\n#ifndef N_MAX\n# define N_MAX 8\n#endif\n\ntypedef union { POINTonE1 e1; POINTonE2 e2; } AggregatedSignature;\ntypedef struct {\n    unsigned int ctrl;\n    unsigned int nelems;\n    const void *DST;\n    size_t DST_len;\n    vec384fp12 GT;\n    AggregatedSignature AggrSign;\n    POINTonE2_affine Q[N_MAX];\n    POINTonE1_affine P[N_MAX];\n} PAIRING;\n\nenum { AGGR_UNDEFINED      = 0,\n       AGGR_MIN_SIG        = 1,\n       AGGR_MIN_PK         = 2,\n       AGGR_SIGN_SET       = 0x10,\n       AGGR_GT_SET         = 0x20,\n       AGGR_HASH_OR_ENCODE = 0x40 };\n#define MIN_SIG_OR_PK (AGGR_MIN_SIG | AGGR_MIN_PK)\n\nstatic const size_t sizeof_pairing = (sizeof(PAIRING) + 7) & ~(size_t)7;\n\nsize_t blst_pairing_sizeof(void)\n{   return sizeof_pairing;   }\n\nvoid blst_pairing_init(PAIRING *ctx, int hash_or_encode,\n                       const void *DST, size_t DST_len)\n{\n    ctx->ctrl = AGGR_UNDEFINED | (hash_or_encode ? AGGR_HASH_OR_ENCODE : 0);\n    ctx->nelems = 0;\n    ctx->DST = (uptr_t)DST==(uptr_t)((byte *)ctx+sizeof_pairing) ? (void *)42\n                                                                 : DST;\n    ctx->DST_len = DST_len;\n}\n\nstatic const void *pairing_get_dst(const PAIRING *ctx)\n{   return (uptr_t)ctx->DST==(uptr_t)42 ? (const byte *)ctx+sizeof_pairing\n                                        : ctx->DST;\n}\n\nconst void *blst_pairing_get_dst(const PAIRING *ctx)\n{   return pairing_get_dst(ctx);   }\n\n#define FROM_AFFINE(out,in) do { \\\n    vec_copy((out)->X, in->X, 2*sizeof(in->X)), \\\n    vec_select((out)->Z, in->X, BLS12_381_Rx.p, sizeof(in->X), \\\n                         vec_is_zero(in->X, 2*sizeof(in->X))); } while(0)\n\n/*\n * Optional |nbits|-wide |scalar| is used to facilitate multiple aggregated\n * signature verification as discussed at\n * https://ethresear.ch/t/fast-verification-of-multiple-bls-signatures/5407.\n * Usage pattern is not finalized yet, because (sig != NULL) is better and\n * will be handled separately...\n */\nstatic BLST_ERROR PAIRING_Aggregate_PK_in_G2(PAIRING *ctx,\n                                             const POINTonE2_affine *PK,\n                                             size_t pk_groupcheck,\n                                             const POINTonE1_affine *sig,\n                                             size_t sig_groupcheck,\n                                             const byte *scalar, size_t nbits,\n                                             const void *msg, size_t msg_len,\n                                             const void *aug, size_t aug_len)\n{\n    if (ctx->ctrl & AGGR_MIN_PK)\n        return BLST_AGGR_TYPE_MISMATCH;\n\n    ctx->ctrl |= AGGR_MIN_SIG;\n\n    /*\n     * Since we don't know if the signature is individual or aggregated,\n     * the only sensible thing to do is to skip over infinite one and\n     * count on the corresponding infinite public key to be rejected,\n     * in case the signature is non-aggregated that is.\n     */\n    if (sig != NULL && !vec_is_zero(sig, sizeof(*sig))) {\n        POINTonE1 *S = &ctx->AggrSign.e1;\n        POINTonE1 P[1];\n\n        FROM_AFFINE(P, sig);\n\n        if (sig_groupcheck && !POINTonE1_in_G1(P))\n            return BLST_POINT_NOT_IN_GROUP;\n\n        if (ctx->ctrl & AGGR_SIGN_SET) {\n            if (nbits != 0 && scalar != NULL) {\n                POINTonE1_mult_w5(P, P, scalar, nbits);\n                POINTonE1_dadd(S, S, P, NULL);\n            } else {\n                POINTonE1_dadd_affine(S, S, sig);\n            }\n        } else {\n            ctx->ctrl |= AGGR_SIGN_SET;\n            if (nbits != 0 && scalar != NULL)\n                POINTonE1_mult_w5(S, P, scalar, nbits);\n            else\n                vec_copy(S, P, sizeof(P));\n        }\n    }\n\n    if (PK != NULL) {\n        unsigned int n;\n        POINTonE1 H[1];\n        const void *DST = pairing_get_dst(ctx);\n\n        /*\n         * Reject infinite public keys.\n         */\n        if (vec_is_zero(PK, sizeof(*PK)))\n            return BLST_PK_IS_INFINITY;\n\n        if (pk_groupcheck) {\n            POINTonE2 P[1];\n\n            FROM_AFFINE(P, PK);\n            if (!POINTonE2_in_G2(P))\n                return BLST_POINT_NOT_IN_GROUP;\n        }\n\n        if (ctx->ctrl & AGGR_HASH_OR_ENCODE)\n            Hash_to_G1(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len);\n        else\n            Encode_to_G1(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len);\n\n        if (nbits != 0 && scalar != NULL)\n            POINTonE1_mult_w5(H, H, scalar, nbits);\n\n        POINTonE1_from_Jacobian(H, H);\n\n        n = ctx->nelems;\n        vec_copy(ctx->Q + n, PK, sizeof(POINTonE2_affine));\n        vec_copy(ctx->P + n, H, sizeof(POINTonE1_affine));\n        if (++n == N_MAX) {\n            if (ctx->ctrl & AGGR_GT_SET) {\n                vec384fp12 GT;\n                miller_loop_n(GT, ctx->Q, ctx->P, n);\n                mul_fp12(ctx->GT, ctx->GT, GT);\n            } else {\n                miller_loop_n(ctx->GT, ctx->Q, ctx->P, n);\n                ctx->ctrl |= AGGR_GT_SET;\n            }\n            n = 0;\n        }\n        ctx->nelems = n;\n    }\n\n    return BLST_SUCCESS;\n}\n\nBLST_ERROR blst_pairing_aggregate_pk_in_g2(PAIRING *ctx,\n                                           const POINTonE2_affine *PK,\n                                           const POINTonE1_affine *signature,\n                                           const void *msg, size_t msg_len,\n                                           const void *aug, size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G2(ctx, PK, 0, signature, 1, NULL, 0,\n                                      msg, msg_len, aug, aug_len);\n}\n\nBLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g2(PAIRING *ctx,\n                                                 const POINTonE2_affine *PK,\n                                                 const POINTonE1_affine *sig,\n                                                 const byte *scalar,\n                                                 size_t nbits,\n                                                 const void *msg,\n                                                 size_t msg_len,\n                                                 const void *aug,\n                                                 size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G2(ctx, PK, 0, sig, 1, scalar, nbits,\n                                      msg, msg_len, aug, aug_len);\n}\n\nBLST_ERROR blst_pairing_chk_n_aggr_pk_in_g2(PAIRING *ctx,\n                                            const POINTonE2_affine *PK,\n                                            size_t pk_grpchk,\n                                            const POINTonE1_affine *signature,\n                                            size_t sig_grpchk,\n                                            const void *msg, size_t msg_len,\n                                            const void *aug, size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G2(ctx, PK, pk_grpchk, signature, sig_grpchk,\n                                      NULL, 0, msg, msg_len, aug, aug_len);\n}\n\nBLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g2(PAIRING *ctx,\n                                                  const POINTonE2_affine *PK,\n                                                  size_t pk_grpchk,\n                                                  const POINTonE1_affine *sig,\n                                                  size_t sig_grpchk,\n                                                  const byte *scalar,\n                                                  size_t nbits,\n                                                  const void *msg,\n                                                  size_t msg_len,\n                                                  const void *aug,\n                                                  size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G2(ctx, PK, pk_grpchk, sig, sig_grpchk,\n                                      scalar, nbits,\n                                      msg, msg_len, aug, aug_len);\n}\n\nstatic BLST_ERROR PAIRING_Aggregate_PK_in_G1(PAIRING *ctx,\n                                             const POINTonE1_affine *PK,\n                                             size_t pk_groupcheck,\n                                             const POINTonE2_affine *sig,\n                                             size_t sig_groupcheck,\n                                             const byte *scalar, size_t nbits,\n                                             const void *msg, size_t msg_len,\n                                             const void *aug, size_t aug_len)\n{\n    if (ctx->ctrl & AGGR_MIN_SIG)\n        return BLST_AGGR_TYPE_MISMATCH;\n\n    ctx->ctrl |= AGGR_MIN_PK;\n\n    /*\n     * Since we don't know if the signature is individual or aggregated,\n     * the only sensible thing to do is to skip over infinite one and\n     * count on the corresponding infinite public key to be rejected,\n     * in case the signature is non-aggregated that is.\n     */\n    if (sig != NULL && !vec_is_zero(sig, sizeof(*sig))) {\n        POINTonE2 *S = &ctx->AggrSign.e2;\n        POINTonE2 P[1];\n\n        FROM_AFFINE(P, sig);\n\n        if (sig_groupcheck && !POINTonE2_in_G2(P))\n            return BLST_POINT_NOT_IN_GROUP;\n\n        if (ctx->ctrl & AGGR_SIGN_SET) {\n            if (nbits != 0 && scalar != NULL) {\n\n                POINTonE2_mult_w5(P, P, scalar, nbits);\n                POINTonE2_dadd(S, S, P, NULL);\n            } else {\n                POINTonE2_dadd_affine(S, S, sig);\n            }\n        } else {\n            ctx->ctrl |= AGGR_SIGN_SET;\n            if (nbits != 0 && scalar != NULL)\n                POINTonE2_mult_w5(S, P, scalar, nbits);\n            else\n                vec_copy(S, P, sizeof(P));\n        }\n    }\n\n    if (PK != NULL) {\n        unsigned int n;\n        POINTonE2 H[1];\n        POINTonE1 pk[1];\n        const void *DST = pairing_get_dst(ctx);\n\n        /*\n         * Reject infinite public keys.\n         */\n        if (vec_is_zero(PK, sizeof(*PK)))\n            return BLST_PK_IS_INFINITY;\n\n        if (pk_groupcheck) {\n            POINTonE1 P[1];\n\n            FROM_AFFINE(P, PK);\n            if (!POINTonE1_in_G1(P))\n                return BLST_POINT_NOT_IN_GROUP;\n        }\n\n        if (ctx->ctrl & AGGR_HASH_OR_ENCODE)\n            Hash_to_G2(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len);\n        else\n            Encode_to_G2(H, msg, msg_len, DST, ctx->DST_len, aug, aug_len);\n\n        POINTonE2_from_Jacobian(H, H);\n\n        if (nbits != 0 && scalar != NULL) {\n            FROM_AFFINE(pk, PK);\n            POINTonE1_mult_w5(pk, pk, scalar, nbits);\n            POINTonE1_from_Jacobian(pk, pk);\n            PK = (const POINTonE1_affine *)pk;\n        }\n\n        n = ctx->nelems;\n        vec_copy(ctx->Q + n, H, sizeof(POINTonE2_affine));\n        vec_copy(ctx->P + n, PK, sizeof(POINTonE1_affine));\n        if (++n == N_MAX) {\n            if (ctx->ctrl & AGGR_GT_SET) {\n                vec384fp12 GT;\n                miller_loop_n(GT, ctx->Q, ctx->P, n);\n                mul_fp12(ctx->GT, ctx->GT, GT);\n            } else {\n                miller_loop_n(ctx->GT, ctx->Q, ctx->P, n);\n                ctx->ctrl |= AGGR_GT_SET;\n            }\n            n = 0;\n        }\n        ctx->nelems = n;\n    }\n\n    return BLST_SUCCESS;\n}\n\nBLST_ERROR blst_pairing_aggregate_pk_in_g1(PAIRING *ctx,\n                                           const POINTonE1_affine *PK,\n                                           const POINTonE2_affine *signature,\n                                           const void *msg, size_t msg_len,\n                                           const void *aug, size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G1(ctx, PK, 0, signature, 1, NULL, 0,\n                                      msg, msg_len, aug, aug_len);\n}\n\nBLST_ERROR blst_pairing_mul_n_aggregate_pk_in_g1(PAIRING *ctx,\n                                                 const POINTonE1_affine *PK,\n                                                 const POINTonE2_affine *sig,\n                                                 const byte *scalar,\n                                                 size_t nbits,\n                                                 const void *msg,\n                                                 size_t msg_len,\n                                                 const void *aug,\n                                                 size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G1(ctx, PK, 0, sig, 1, scalar, nbits,\n                                      msg, msg_len, aug, aug_len);\n}\n\nBLST_ERROR blst_pairing_chk_n_aggr_pk_in_g1(PAIRING *ctx,\n                                            const POINTonE1_affine *PK,\n                                            size_t pk_grpchk,\n                                            const POINTonE2_affine *signature,\n                                            size_t sig_grpchk,\n                                            const void *msg, size_t msg_len,\n                                            const void *aug, size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G1(ctx, PK, pk_grpchk, signature, sig_grpchk,\n                                      NULL, 0, msg, msg_len, aug, aug_len);\n}\n\nBLST_ERROR blst_pairing_chk_n_mul_n_aggr_pk_in_g1(PAIRING *ctx,\n                                                  const POINTonE1_affine *PK,\n                                                  size_t pk_grpchk,\n                                                  const POINTonE2_affine *sig,\n                                                  size_t sig_grpchk,\n                                                  const byte *scalar,\n                                                  size_t nbits,\n                                                  const void *msg,\n                                                  size_t msg_len,\n                                                  const void *aug,\n                                                  size_t aug_len)\n{   return PAIRING_Aggregate_PK_in_G1(ctx, PK, pk_grpchk, sig, sig_grpchk,\n                                      scalar, nbits,\n                                      msg, msg_len, aug, aug_len);\n}\n\nstatic void PAIRING_Commit(PAIRING *ctx)\n{\n    unsigned int n;\n\n    if ((n = ctx->nelems) != 0) {\n        if (ctx->ctrl & AGGR_GT_SET) {\n            vec384fp12 GT;\n            miller_loop_n(GT, ctx->Q, ctx->P, n);\n            mul_fp12(ctx->GT, ctx->GT, GT);\n        } else {\n            miller_loop_n(ctx->GT, ctx->Q, ctx->P, n);\n            ctx->ctrl |= AGGR_GT_SET;\n        }\n        ctx->nelems = 0;\n    }\n}\n\nvoid blst_pairing_commit(PAIRING *ctx)\n{   PAIRING_Commit(ctx);   }\n\nBLST_ERROR blst_pairing_merge(PAIRING *ctx, const PAIRING *ctx1)\n{\n    if ((ctx->ctrl & MIN_SIG_OR_PK) != AGGR_UNDEFINED\n        && (ctx1->ctrl & MIN_SIG_OR_PK) != AGGR_UNDEFINED\n        && (ctx->ctrl & ctx1->ctrl & MIN_SIG_OR_PK) == 0)\n        return BLST_AGGR_TYPE_MISMATCH;\n\n    /* context producers are expected to have called blst_pairing_commit */\n    if (ctx->nelems || ctx1->nelems)\n        return BLST_AGGR_TYPE_MISMATCH;\n\n    ctx->ctrl |= ctx1->ctrl & MIN_SIG_OR_PK;\n\n    switch (ctx->ctrl & MIN_SIG_OR_PK) {\n        case AGGR_MIN_SIG:\n            if (ctx->ctrl & ctx1->ctrl & AGGR_SIGN_SET) {\n                POINTonE1_dadd(&ctx->AggrSign.e1, &ctx->AggrSign.e1,\n                                                  &ctx1->AggrSign.e1, NULL);\n            } else if (ctx1->ctrl & AGGR_SIGN_SET) {\n                ctx->ctrl |= AGGR_SIGN_SET;\n                vec_copy(&ctx->AggrSign.e1, &ctx1->AggrSign.e1,\n                         sizeof(ctx->AggrSign.e1));\n            }\n            break;\n        case AGGR_MIN_PK:\n            if (ctx->ctrl & ctx1->ctrl & AGGR_SIGN_SET) {\n                POINTonE2_dadd(&ctx->AggrSign.e2, &ctx->AggrSign.e2,\n                                                  &ctx1->AggrSign.e2, NULL);\n            } else if (ctx1->ctrl & AGGR_SIGN_SET) {\n                ctx->ctrl |= AGGR_SIGN_SET;\n                vec_copy(&ctx->AggrSign.e2, &ctx1->AggrSign.e2,\n                         sizeof(ctx->AggrSign.e2));\n            }\n            break;\n        case AGGR_UNDEFINED:\n            break;\n        default:\n            return BLST_AGGR_TYPE_MISMATCH;\n    }\n\n    if (ctx->ctrl & ctx1->ctrl & AGGR_GT_SET) {\n        mul_fp12(ctx->GT, ctx->GT, ctx1->GT);\n    } else if (ctx1->ctrl & AGGR_GT_SET) {\n        ctx->ctrl |= AGGR_GT_SET;\n        vec_copy(ctx->GT, ctx1->GT, sizeof(ctx->GT));\n    }\n\n    return BLST_SUCCESS;\n}\n\nstatic bool_t PAIRING_FinalVerify(const PAIRING *ctx, const vec384fp12 GTsig)\n{\n    vec384fp12 GT;\n\n    if (!(ctx->ctrl & AGGR_GT_SET))\n        return 0;\n\n    if (GTsig != NULL) {\n        vec_copy(GT, GTsig, sizeof(GT));\n    } else if (ctx->ctrl & AGGR_SIGN_SET) {\n        AggregatedSignature AggrSign;\n\n        switch (ctx->ctrl & MIN_SIG_OR_PK) {\n            case AGGR_MIN_SIG:\n                POINTonE1_from_Jacobian(&AggrSign.e1, &ctx->AggrSign.e1);\n                miller_loop_n(GT, (const POINTonE2_affine *)&BLS12_381_G2,\n                                  (const POINTonE1_affine *)&AggrSign.e1, 1);\n                break;\n            case AGGR_MIN_PK:\n                POINTonE2_from_Jacobian(&AggrSign.e2, &ctx->AggrSign.e2);\n                miller_loop_n(GT, (const POINTonE2_affine *)&AggrSign.e2,\n                                  (const POINTonE1_affine *)&BLS12_381_G1, 1);\n                break;\n            default:\n                return 0;\n        }\n    } else {\n        /*\n         * The aggregated signature was infinite, relation between the\n         * hashes and the public keys has to be VERY special...\n         */\n        vec_copy(GT, BLS12_381_Rx.p12, sizeof(GT));\n    }\n\n    conjugate_fp12(GT);\n    mul_fp12(GT, GT, ctx->GT);\n    final_exp(GT, GT);\n\n    /* return GT==1 */\n    return vec_is_equal(GT[0][0], BLS12_381_Rx.p2, sizeof(GT[0][0])) &\n           vec_is_zero(GT[0][1], sizeof(GT) - sizeof(GT[0][0]));\n}\n\nint blst_pairing_finalverify(const PAIRING *ctx, const vec384fp12 GTsig)\n{   return (int)PAIRING_FinalVerify(ctx, GTsig);   }\n\nint blst_fp12_finalverify(const vec384fp12 GT1, const vec384fp12 GT2)\n{\n    vec384fp12 GT;\n\n    vec_copy(GT, GT1, sizeof(GT));\n    conjugate_fp12(GT);\n    mul_fp12(GT, GT, GT2);\n    final_exp(GT, GT);\n\n    /* return GT==1 */\n    return (int)(vec_is_equal(GT[0][0], BLS12_381_Rx.p2, sizeof(GT[0][0])) &\n                 vec_is_zero(GT[0][1], sizeof(GT) - sizeof(GT[0][0])));\n}\n\nvoid blst_pairing_raw_aggregate(PAIRING *ctx, const POINTonE2_affine *q,\n                                              const POINTonE1_affine *p)\n{\n    unsigned int n;\n\n    if (vec_is_zero(q, sizeof(*q)) & vec_is_zero(p, sizeof(*p)))\n        return;\n\n    n = ctx->nelems;\n    vec_copy(ctx->Q + n, q, sizeof(*q));\n    vec_copy(ctx->P + n, p, sizeof(*p));\n    if (++n == N_MAX) {\n        if (ctx->ctrl & AGGR_GT_SET) {\n            vec384fp12 GT;\n            miller_loop_n(GT, ctx->Q, ctx->P, n);\n            mul_fp12(ctx->GT, ctx->GT, GT);\n        } else {\n            miller_loop_n(ctx->GT, ctx->Q, ctx->P, n);\n            ctx->ctrl |= AGGR_GT_SET;\n        }\n        n = 0;\n    }\n    ctx->nelems = n;\n}\n\nvec384fp12 *blst_pairing_as_fp12(PAIRING *ctx)\n{\n    PAIRING_Commit(ctx);\n    return (vec384fp12 *)ctx->GT;\n}\n\n/*\n * PAIRING context-free entry points.\n *\n * To perform FastAggregateVerify, aggregate all public keys and\n * signatures with corresponding blst_aggregate_in_g{12}, convert\n * result to affine and call suitable blst_core_verify_pk_in_g{12}\n * or blst_aggregated_in_g{12}...\n */\nBLST_ERROR blst_aggregate_in_g1(POINTonE1 *out, const POINTonE1 *in,\n                                                const unsigned char *zwire)\n{\n    POINTonE1 P[1];\n    BLST_ERROR ret;\n\n    ret = POINTonE1_Deserialize_Z((POINTonE1_affine *)P, zwire);\n\n    if (ret != BLST_SUCCESS)\n        return ret;\n\n    if (vec_is_zero(P, sizeof(POINTonE1_affine))) {\n        if (in == NULL)\n            vec_zero(out, sizeof(*out));\n        return BLST_SUCCESS;\n    }\n\n    vec_copy(P->Z, BLS12_381_Rx.p, sizeof(P->Z));\n\n    if (!POINTonE1_in_G1(P))\n        return BLST_POINT_NOT_IN_GROUP;\n\n    if (in == NULL)\n        vec_copy(out, P, sizeof(P));\n    else\n        POINTonE1_dadd_affine(out, in, (POINTonE1_affine *)P);\n\n    return BLST_SUCCESS;\n}\n\nBLST_ERROR blst_aggregate_in_g2(POINTonE2 *out, const POINTonE2 *in,\n                                                const unsigned char *zwire)\n{\n    POINTonE2 P[1];\n    BLST_ERROR ret;\n\n    ret = POINTonE2_Deserialize_Z((POINTonE2_affine *)P, zwire);\n\n    if (ret != BLST_SUCCESS)\n        return ret;\n\n    if (vec_is_zero(P, sizeof(POINTonE2_affine))) {\n        if (in == NULL)\n            vec_zero(out, sizeof(*out));\n        return BLST_SUCCESS;\n    }\n\n    vec_copy(P->Z, BLS12_381_Rx.p, sizeof(P->Z));\n\n    if (!POINTonE2_in_G2(P))\n        return BLST_POINT_NOT_IN_GROUP;\n\n    if (in == NULL) {\n        vec_copy(out, P, sizeof(P));\n    } else {\n        POINTonE2_dadd_affine(out, in, (POINTonE2_affine *)P);\n    }\n    return BLST_SUCCESS;\n}\n\nvoid blst_aggregated_in_g1(vec384fp12 ret, const POINTonE1_affine *sig)\n{   miller_loop_n(ret, (const POINTonE2_affine *)&BLS12_381_G2, sig, 1);   }\n\nvoid blst_aggregated_in_g2(vec384fp12 ret, const POINTonE2_affine *sig)\n{   miller_loop_n(ret, sig, (const POINTonE1_affine *)&BLS12_381_G1, 1);   }\n\nBLST_ERROR blst_core_verify_pk_in_g1(const POINTonE1_affine *pk,\n                                     const POINTonE2_affine *signature,\n                                     int hash_or_encode,\n                                     const void *msg, size_t msg_len,\n                                     const void *DST, size_t DST_len,\n                                     const void *aug, size_t aug_len)\n{\n    PAIRING ctx;\n    BLST_ERROR ret;\n\n    ctx.ctrl = AGGR_UNDEFINED | (hash_or_encode ? AGGR_HASH_OR_ENCODE : 0);\n    ctx.nelems = 0;\n    ctx.DST = DST;\n    ctx.DST_len = DST_len;\n\n    ret = PAIRING_Aggregate_PK_in_G1(&ctx, pk, 1, signature, 1, NULL, 0,\n                                     msg, msg_len, aug, aug_len);\n    if (ret != BLST_SUCCESS)\n        return ret;\n\n    PAIRING_Commit(&ctx);\n\n    return PAIRING_FinalVerify(&ctx, NULL) ? BLST_SUCCESS : BLST_VERIFY_FAIL;\n}\n\nBLST_ERROR blst_core_verify_pk_in_g2(const POINTonE2_affine *pk,\n                                     const POINTonE1_affine *signature,\n                                     int hash_or_encode,\n                                     const void *msg, size_t msg_len,\n                                     const void *DST, size_t DST_len,\n                                     const void *aug, size_t aug_len)\n{\n    PAIRING ctx;\n    BLST_ERROR ret;\n\n    ctx.ctrl = AGGR_UNDEFINED | (hash_or_encode ? AGGR_HASH_OR_ENCODE : 0);\n    ctx.nelems = 0;\n    ctx.DST = DST;\n    ctx.DST_len = DST_len;\n\n    ret = PAIRING_Aggregate_PK_in_G2(&ctx, pk, 1, signature, 1, NULL, 0,\n                                     msg, msg_len, aug, aug_len);\n    if (ret != BLST_SUCCESS)\n        return ret;\n\n    PAIRING_Commit(&ctx);\n\n    return PAIRING_FinalVerify(&ctx, NULL) ? BLST_SUCCESS : BLST_VERIFY_FAIL;\n}\n"
  },
  {
    "path": "src/asm/add_mod_256-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\n($r_ptr,$a_ptr,$b_ptr,$n_ptr) = map(\"x$_\", 0..3);\n\n@mod=map(\"x$_\",(4..7));\n@a=map(\"x$_\",(8..11));\n@b=map(\"x$_\",(12..15));\n@t=map(\"x$_\",(16,17,1..3));\n\n$code.=<<___;\n.text\n\n.globl\tadd_mod_256\n.hidden\tadd_mod_256\n.type\tadd_mod_256,%function\n.align\t5\nadd_mod_256:\n\thint\t#34\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\n\t ldp\t@a[2],@a[3],[$a_ptr,#16]\n\tadds\t@a[0],@a[0],@b[0]\n\t ldp\t@b[2],@b[3],[$b_ptr,#16]\n\tadcs\t@a[1],@a[1],@b[1]\n\t ldp\t@mod[0],@mod[1],[$n_ptr]\n\tadcs\t@a[2],@a[2],@b[2]\n\t ldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tadcs\t@a[3],@a[3],@b[3]\n\tadc\t@t[4],xzr,xzr\n\n\tsubs\t@t[0],@a[0],@mod[0]\n\tsbcs\t@t[1],@a[1],@mod[1]\n\tsbcs\t@t[2],@a[2],@mod[2]\n\tsbcs\t@t[3],@a[3],@mod[3]\n\tsbcs\txzr,@t[4],xzr\n\n\tcsel\t@a[0],@a[0],@t[0],lo\n\tcsel\t@a[1],@a[1],@t[1],lo\n\tcsel\t@a[2],@a[2],@t[2],lo\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcsel\t@a[3],@a[3],@t[3],lo\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tret\n.size\tadd_mod_256,.-add_mod_256\n\n.globl\tmul_by_3_mod_256\n.hidden\tmul_by_3_mod_256\n.type\tmul_by_3_mod_256,%function\n.align\t5\nmul_by_3_mod_256:\n\thint\t#34\n\tldp\t@b[0],@b[1],[$a_ptr]\n\tldp\t@b[2],@b[3],[$a_ptr,#16]\n\n\tadds\t@a[0],@b[0],@b[0]\n\t ldp\t@mod[0],@mod[1],[$b_ptr]\n\tadcs\t@a[1],@b[1],@b[1]\n\t ldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tadcs\t@a[2],@b[2],@b[2]\n\tadcs\t@a[3],@b[3],@b[3]\n\tadc\t@t[4],xzr,xzr\n\n\tsubs\t@t[0],@a[0],@mod[0]\n\tsbcs\t@t[1],@a[1],@mod[1]\n\tsbcs\t@t[2],@a[2],@mod[2]\n\tsbcs\t@t[3],@a[3],@mod[3]\n\tsbcs\txzr,@t[4],xzr\n\n\tcsel\t@a[0],@a[0],@t[0],lo\n\tcsel\t@a[1],@a[1],@t[1],lo\n\tcsel\t@a[2],@a[2],@t[2],lo\n\tcsel\t@a[3],@a[3],@t[3],lo\n\n\tadds\t@a[0],@a[0],@b[0]\n\tadcs\t@a[1],@a[1],@b[1]\n\tadcs\t@a[2],@a[2],@b[2]\n\tadcs\t@a[3],@a[3],@b[3]\n\tadc\t@t[4],xzr,xzr\n\n\tsubs\t@t[0],@a[0],@mod[0]\n\tsbcs\t@t[1],@a[1],@mod[1]\n\tsbcs\t@t[2],@a[2],@mod[2]\n\tsbcs\t@t[3],@a[3],@mod[3]\n\tsbcs\txzr,@t[4],xzr\n\n\tcsel\t@a[0],@a[0],@t[0],lo\n\tcsel\t@a[1],@a[1],@t[1],lo\n\tcsel\t@a[2],@a[2],@t[2],lo\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcsel\t@a[3],@a[3],@t[3],lo\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tret\n.size\tmul_by_3_mod_256,.-mul_by_3_mod_256\n\n.globl\tlshift_mod_256\n.hidden\tlshift_mod_256\n.type\tlshift_mod_256,%function\n.align\t5\nlshift_mod_256:\n\thint\t#34\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\n.Loop_lshift_mod_256:\n\tadds\t@a[0],@a[0],@a[0]\n\tsub\t$b_ptr,$b_ptr,#1\n\tadcs\t@a[1],@a[1],@a[1]\n\tadcs\t@a[2],@a[2],@a[2]\n\tadcs\t@a[3],@a[3],@a[3]\n\tadc\t@t[4],xzr,xzr\n\n\tsubs\t@b[0],@a[0],@mod[0]\n\tsbcs\t@b[1],@a[1],@mod[1]\n\tsbcs\t@b[2],@a[2],@mod[2]\n\tsbcs\t@b[3],@a[3],@mod[3]\n\tsbcs\txzr,@t[4],xzr\n\n\tcsel\t@a[0],@a[0],@b[0],lo\n\tcsel\t@a[1],@a[1],@b[1],lo\n\tcsel\t@a[2],@a[2],@b[2],lo\n\tcsel\t@a[3],@a[3],@b[3],lo\n\n\tcbnz\t$b_ptr,.Loop_lshift_mod_256\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tret\n.size\tlshift_mod_256,.-lshift_mod_256\n\n.globl\trshift_mod_256\n.hidden\trshift_mod_256\n.type\trshift_mod_256,%function\n.align\t5\nrshift_mod_256:\n\thint\t#34\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\n.Loop_rshift:\n\tadds\t@b[0],@a[0],@mod[0]\n\tsub\t$b_ptr,$b_ptr,#1\n\tadcs\t@b[1],@a[1],@mod[1]\n\tadcs\t@b[2],@a[2],@mod[2]\n\tadcs\t@b[3],@a[3],@mod[3]\n\tadc\t@t[4],xzr,xzr\n\ttst\t@a[0],#1\n\n\tcsel\t@b[0],@b[0],@a[0],ne\n\tcsel\t@b[1],@b[1],@a[1],ne\n\tcsel\t@b[2],@b[2],@a[2],ne\n\tcsel\t@b[3],@b[3],@a[3],ne\n\tcsel\t@t[4],@t[4],xzr,ne\n\n\textr\t@a[0],@b[1],@b[0],#1\n\textr\t@a[1],@b[2],@b[1],#1\n\textr\t@a[2],@b[3],@b[2],#1\n\textr\t@a[3],@t[4],@b[3],#1\n\n\tcbnz\t$b_ptr,.Loop_rshift\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tret\n.size\trshift_mod_256,.-rshift_mod_256\n\n.globl\tcneg_mod_256\n.hidden\tcneg_mod_256\n.type\tcneg_mod_256,%function\n.align\t5\ncneg_mod_256:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\n\t ldp\t@a[2],@a[3],[$a_ptr,#16]\n\tsubs\t@b[0],@mod[0],@a[0]\n\t ldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\t orr\t@mod[0],@a[0],@a[1]\n\tsbcs\t@b[1],@mod[1],@a[1]\n\t orr\t@mod[1],@a[2],@a[3]\n\tsbcs\t@b[2],@mod[2],@a[2]\n\t orr\t@t[4],@mod[0],@mod[1]\n\tsbc\t@b[3],@mod[3],@a[3]\n\n\tcmp\t@t[4],#0\n\tcsetm\t@t[4],ne\n\tands\t$b_ptr,$b_ptr,@t[4]\n\n\tcsel\t@a[0],@a[0],@b[0],eq\n\tcsel\t@a[1],@a[1],@b[1],eq\n\tcsel\t@a[2],@a[2],@b[2],eq\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcsel\t@a[3],@a[3],@b[3],eq\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tret\n.size\tcneg_mod_256,.-cneg_mod_256\n\n.globl\tsub_mod_256\n.hidden\tsub_mod_256\n.type\tsub_mod_256,%function\n.align\t5\nsub_mod_256:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\n\t ldp\t@a[2],@a[3],[$a_ptr,#16]\n\tsubs\t@a[0],@a[0],@b[0]\n\t ldp\t@b[2],@b[3],[$b_ptr,#16]\n\tsbcs\t@a[1],@a[1],@b[1]\n\t ldp\t@mod[0],@mod[1],[$n_ptr]\n\tsbcs\t@a[2],@a[2],@b[2]\n\t ldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tsbcs\t@a[3],@a[3],@b[3]\n\tsbc\t@t[4],xzr,xzr\n\n\t and\t@mod[0],@mod[0],@t[4]\n\t and\t@mod[1],@mod[1],@t[4]\n\tadds\t@a[0],@a[0],@mod[0]\n\t and\t@mod[2],@mod[2],@t[4]\n\tadcs\t@a[1],@a[1],@mod[1]\n\t and\t@mod[3],@mod[3],@t[4]\n\tadcs\t@a[2],@a[2],@mod[2]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tadc\t@a[3],@a[3],@mod[3]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tret\n.size\tsub_mod_256,.-sub_mod_256\n\n.globl\tcheck_mod_256\n.hidden\tcheck_mod_256\n.type\tcheck_mod_256,%function\n.align\t5\ncheck_mod_256:\n\tldp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[2],@a[3],[$r_ptr,#16]\n\tldp\t@mod[0],@mod[1],[$a_ptr]\n\tldp\t@mod[2],@mod[3],[$a_ptr,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\t@a[0],@a[0]\n\trev\t@a[1],@a[1]\n\trev\t@a[2],@a[2]\n\trev\t@a[3],@a[3]\n#endif\n\n\tsubs\txzr,@a[0],@mod[0]\n\tsbcs\txzr,@a[1],@mod[1]\n\torr\t@a[0],@a[0],@a[1]\n\tsbcs\txzr,@a[2],@mod[2]\n\torr\t@a[0],@a[0],@a[2]\n\tsbcs\txzr,@a[3],@mod[3]\n\torr\t@a[0],@a[0],@a[3]\n\tsbc\t$a_ptr,xzr,xzr\n\n\tcmp\t@a[0],#0\n\tmov\tx0,#1\n\tcsel\tx0,x0,xzr,ne\n\tand\tx0,x0,$a_ptr\n\n\tret\n.size\tcheck_mod_256,.-check_mod_256\n\n.globl\tadd_n_check_mod_256\n.hidden\tadd_n_check_mod_256\n.type\tadd_n_check_mod_256,%function\n.align\t5\nadd_n_check_mod_256:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@b[2],@b[3],[$b_ptr,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\t@a[0],@a[0]\n\trev\t@b[0],@b[0]\n\trev\t@a[1],@a[1]\n\trev\t@b[1],@b[1]\n\trev\t@a[2],@a[2]\n\trev\t@b[2],@b[2]\n\trev\t@a[3],@a[3]\n\trev\t@b[3],@b[3]\n#endif\n\n\tadds\t@a[0],@a[0],@b[0]\n\t ldp\t@mod[0],@mod[1],[$n_ptr]\n\tadcs\t@a[1],@a[1],@b[1]\n\t ldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tadcs\t@a[2],@a[2],@b[2]\n\tadcs\t@a[3],@a[3],@b[3]\n\tadc\t@t[4],xzr,xzr\n\n\tsubs\t@t[0],@a[0],@mod[0]\n\tsbcs\t@t[1],@a[1],@mod[1]\n\tsbcs\t@t[2],@a[2],@mod[2]\n\tsbcs\t@t[3],@a[3],@mod[3]\n\tsbcs\txzr,@t[4],xzr\n\n\tcsel\t@a[0],@a[0],@t[0],lo\n\tcsel\t@a[1],@a[1],@t[1],lo\n\tcsel\t@a[2],@a[2],@t[2],lo\n\tcsel\t@a[3],@a[3],@t[3],lo\n\n\torr\t@t[0], @a[0], @a[1]\n\torr\t@t[1], @a[2], @a[3]\n\torr\t@t[0], @t[0], @t[1]\n\n#ifdef\t__AARCH64EB__\n\trev\t@a[0],@a[0]\n\trev\t@a[1],@a[1]\n\trev\t@a[2],@a[2]\n\trev\t@a[3],@a[3]\n#endif\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tmov\t@t[1], #1\n\tcmp\t@t[0], #0\n\tcsel\tx0, @t[1], xzr, ne\n\n\tret\n.size\tadd_n_check_mod_256,.-add_n_check_mod_256\n\n.globl\tsub_n_check_mod_256\n.hidden\tsub_n_check_mod_256\n.type\tsub_n_check_mod_256,%function\n.align\t5\nsub_n_check_mod_256:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@b[2],@b[3],[$b_ptr,#16]\n\n#ifdef\t__AARCH64EB__\n\trev\t@a[0],@a[0]\n\trev\t@b[0],@b[0]\n\trev\t@a[1],@a[1]\n\trev\t@b[1],@b[1]\n\trev\t@a[2],@a[2]\n\trev\t@b[2],@b[2]\n\trev\t@a[3],@a[3]\n\trev\t@b[3],@b[3]\n#endif\n\n\tsubs\t@a[0],@a[0],@b[0]\n\tsbcs\t@a[1],@a[1],@b[1]\n\t ldp\t@mod[0],@mod[1],[$n_ptr]\n\tsbcs\t@a[2],@a[2],@b[2]\n\t ldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tsbcs\t@a[3],@a[3],@b[3]\n\tsbc\t@t[4],xzr,xzr\n\n\t and\t@mod[0],@mod[0],@t[4]\n\t and\t@mod[1],@mod[1],@t[4]\n\tadds\t@a[0],@a[0],@mod[0]\n\t and\t@mod[2],@mod[2],@t[4]\n\tadcs\t@a[1],@a[1],@mod[1]\n\t and\t@mod[3],@mod[3],@t[4]\n\tadcs\t@a[2],@a[2],@mod[2]\n\tadc\t@a[3],@a[3],@mod[3]\n\n\torr\t@t[0], @a[0], @a[1]\n\torr\t@t[1], @a[2], @a[3]\n\torr\t@t[0], @t[0], @t[1]\n\n#ifdef\t__AARCH64EB__\n\trev\t@a[0],@a[0]\n\trev\t@a[1],@a[1]\n\trev\t@a[2],@a[2]\n\trev\t@a[3],@a[3]\n#endif\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tmov\t@t[1], #1\n\tcmp\t@t[0], #0\n\tcsel\tx0, @t[1], xzr, ne\n\n\tret\n.size\tsub_n_check_mod_256,.-sub_n_check_mod_256\n___\n\nprint $code;\n\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/add_mod_256-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\");\n$b_ptr = \"%rbx\";\n\n{ ############################################################## 256 bits add\nmy @acc=map(\"%r$_\",(8..11, \"ax\", \"si\", \"bx\", \"bp\", 12));\n\n$code.=<<___;\n.text\n\n.globl\tadd_mod_256\n.hidden\tadd_mod_256\n.type\tadd_mod_256,\\@function,4,\"unwind\"\n.align\t32\nadd_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n.Loaded_a_add_mod_256:\n\tadd\t8*0($b_org), @acc[0]\n\tadc\t8*1($b_org), @acc[1]\n\t mov\t@acc[0], @acc[4]\n\tadc\t8*2($b_org), @acc[2]\n\t mov\t@acc[1], @acc[5]\n\tadc\t8*3($b_org), @acc[3]\n\tsbb\t$b_org, $b_org\n\n\t mov\t@acc[2], @acc[6]\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[3], @acc[7]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[4], @acc[0]\n\tcmovc\t@acc[5], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t@acc[6], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[7], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tadd_mod_256,.-add_mod_256\n\n########################################################################\n.globl\tmul_by_3_mod_256\n.hidden\tmul_by_3_mod_256\n.type\tmul_by_3_mod_256,\\@function,3,\"unwind\"\n.align\t32\nmul_by_3_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n.cfi_end_prologue\n\n\tmov\t$b_org,$n_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t$a_ptr,$b_org\n\tmov\t8*3($a_ptr), @acc[3]\n\n\tcall\t__lshift_mod_256\n\tmov\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tjmp\t.Loaded_a_add_mod_256\n\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_by_3_mod_256,.-mul_by_3_mod_256\n\n.type\t__lshift_mod_256,\\@abi-omnipotent\n.align\t32\n__lshift_mod_256:\n\tadd\t@acc[0], @acc[0]\n\tadc\t@acc[1], @acc[1]\n\t mov\t@acc[0], @acc[4]\n\tadc\t@acc[2], @acc[2]\n\t mov\t@acc[1], @acc[5]\n\tadc\t@acc[3], @acc[3]\n\tsbb\t@acc[8], @acc[8]\n\n\t mov\t@acc[2], @acc[6]\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[3], @acc[7]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t\\$0, @acc[8]\n\n\tcmovc\t@acc[4], @acc[0]\n\tcmovc\t@acc[5], @acc[1]\n\tcmovc\t@acc[6], @acc[2]\n\tcmovc\t@acc[7], @acc[3]\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[4]\n.size\t__lshift_mod_256,.-__lshift_mod_256\n\n########################################################################\n.globl\tlshift_mod_256\n.hidden\tlshift_mod_256\n.type\tlshift_mod_256,\\@function,4,\"unwind\"\n.align\t32\nlshift_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n.Loop_lshift_mod_256:\n\tcall\t__lshift_mod_256\n\tdec\t%edx\n\tjnz\t.Loop_lshift_mod_256\n\n\tmov\t@acc[0], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tlshift_mod_256,.-lshift_mod_256\n\n########################################################################\n.globl\trshift_mod_256\n.hidden\trshift_mod_256\n.type\trshift_mod_256,\\@function,4,\"unwind\"\n.align\t32\nrshift_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[7]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n.Loop_rshift_mod_256:\n\tmov\t@acc[7], @acc[0]\n\tand\t\\$1, @acc[7]\n\tmov\t8*0($n_ptr), @acc[4]\n\tneg\t@acc[7]\n\tmov\t8*1($n_ptr), @acc[5]\n\tmov\t8*2($n_ptr), @acc[6]\n\n\tand\t@acc[7], @acc[4]\n\tand\t@acc[7], @acc[5]\n\tand\t@acc[7], @acc[6]\n\tand\t8*3($n_ptr), @acc[7]\n\n\tadd\t@acc[4], @acc[0]\n\tadc\t@acc[5], @acc[1]\n\tadc\t@acc[6], @acc[2]\n\tadc\t@acc[7], @acc[3]\n\tsbb\t@acc[4], @acc[4]\n\n\tshr\t\\$1, @acc[0]\n\tmov\t@acc[1], @acc[7]\n\tshr\t\\$1, @acc[1]\n\tmov\t@acc[2], @acc[6]\n\tshr\t\\$1, @acc[2]\n\tmov\t@acc[3], @acc[5]\n\tshr\t\\$1, @acc[3]\n\n\tshl\t\\$63, @acc[7]\n\tshl\t\\$63, @acc[6]\n\tor\t@acc[0], @acc[7]\n\tshl\t\\$63, @acc[5]\n\tor\t@acc[6], @acc[1]\n\tshl\t\\$63, @acc[4]\n\tor\t@acc[5], @acc[2]\n\tor\t@acc[4], @acc[3]\n\n\tdec\t%edx\n\tjnz\t.Loop_rshift_mod_256\n\n\tmov\t@acc[7], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\trshift_mod_256,.-rshift_mod_256\n\n########################################################################\n.globl\tcneg_mod_256\n.hidden\tcneg_mod_256\n.type\tcneg_mod_256,\\@function,4,\"unwind\"\n.align\t32\ncneg_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[8]\t# load a[0:3]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t@acc[8], @acc[0]\n\tmov\t8*3($a_ptr), @acc[3]\n\tor\t@acc[1], @acc[8]\n\tor\t@acc[2], @acc[8]\n\tor\t@acc[3], @acc[8]\n\tmov\t\\$-1, @acc[7]\n\n\tmov\t8*0($n_ptr), @acc[4]\t# load n[0:3]\n\tcmovnz\t@acc[7], @acc[8]\t# mask = a[0:3] ? -1 : 0\n\tmov\t8*1($n_ptr), @acc[5]\n\tmov\t8*2($n_ptr), @acc[6]\n\tand\t@acc[8], @acc[4]\t# n[0:3] &= mask\n\tmov\t8*3($n_ptr), @acc[7]\n\tand\t@acc[8], @acc[5]\n\tand\t@acc[8], @acc[6]\n\tand\t@acc[8], @acc[7]\n\n\tsub\t@acc[0], @acc[4]\t# a[0:3] ? n[0:3]-a[0:3] : 0-0\n\tsbb\t@acc[1], @acc[5]\n\tsbb\t@acc[2], @acc[6]\n\tsbb\t@acc[3], @acc[7]\n\n\tor\t$b_org, $b_org\t\t# check condition flag\n\n\tcmovz\t@acc[0], @acc[4]\t# flag ? n[0:3]-a[0:3] : a[0:3]\n\tcmovz\t@acc[1], @acc[5]\n\tmov\t@acc[4], 8*0($r_ptr)\n\tcmovz\t@acc[2], @acc[6]\n\tmov\t@acc[5], 8*1($r_ptr)\n\tcmovz\t@acc[3], @acc[7]\n\tmov\t@acc[6], 8*2($r_ptr)\n\tmov\t@acc[7], 8*3($r_ptr)\n\n\tmov\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tcneg_mod_256,.-cneg_mod_256\n\n########################################################################\n.globl\tsub_mod_256\n.hidden\tsub_mod_256\n.type\tsub_mod_256,\\@function,4,\"unwind\"\n.align\t32\nsub_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n\tsub\t8*0($b_org), @acc[0]\n\t mov\t8*0($n_ptr), @acc[4]\n\tsbb\t8*1($b_org), @acc[1]\n\t mov\t8*1($n_ptr), @acc[5]\n\tsbb\t8*2($b_org), @acc[2]\n\t mov\t8*2($n_ptr), @acc[6]\n\tsbb\t8*3($b_org), @acc[3]\n\t mov\t8*3($n_ptr), @acc[7]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[4]\n\tand\t$b_org, @acc[5]\n\tand\t$b_org, @acc[6]\n\tand\t$b_org, @acc[7]\n\n\tadd\t@acc[4], @acc[0]\n\tadc\t@acc[5], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[6], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[7], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsub_mod_256,.-sub_mod_256\n\n########################################################################\n.globl\tcheck_mod_256\n.hidden\tcheck_mod_256\n.type\tcheck_mod_256,\\@function,2,\"unwind\"\n.align\t32\ncheck_mod_256:\n.cfi_startproc\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($r_ptr), %rax\n\tmov\t8*1($r_ptr), @acc[1]\n\tmov\t8*2($r_ptr), @acc[2]\n\tmov\t8*3($r_ptr), @acc[3]\n\n\tmov\t%rax, @acc[0]\t\t# see if it's zero\n\tor\t@acc[1], %rax\n\tor\t@acc[2], %rax\n\tor\t@acc[3], %rax\n\n\tsub\t8*0($a_ptr), @acc[0]\t# does subtracting modulus borrow?\n\tsbb\t8*1($a_ptr), @acc[1]\n\tsbb\t8*2($a_ptr), @acc[2]\n\tsbb\t8*3($a_ptr), @acc[3]\n\tsbb\t$a_ptr, $a_ptr\n\n\tmov\t\\$1, %rdx\n\tcmp\t\\$0, %rax\n\tcmovne\t%rdx, %rax\n\tand\t$a_ptr, %rax\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tcheck_mod_256,.-check_mod_256\n\n########################################################################\n.globl\tadd_n_check_mod_256\n.hidden\tadd_n_check_mod_256\n.type\tadd_n_check_mod_256,\\@function,4,\"unwind\"\n.align\t32\nadd_n_check_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n\tadd\t8*0($b_org), @acc[0]\n\tadc\t8*1($b_org), @acc[1]\n\t mov\t@acc[0], @acc[4]\n\tadc\t8*2($b_org), @acc[2]\n\t mov\t@acc[1], @acc[5]\n\tadc\t8*3($b_org), @acc[3]\n\tsbb\t$b_org, $b_org\n\n\t mov\t@acc[2], @acc[6]\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[3], @acc[7]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[4], @acc[0]\n\tcmovc\t@acc[5], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t@acc[6], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[7], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[3], @acc[2]\n\tor\t@acc[2], @acc[0]\n\tmov\t\\$1, %rax\n\tcmovz\t@acc[0], %rax\n\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tadd_n_check_mod_256,.-add_n_check_mod_256\n\n########################################################################\n.globl\tsub_n_check_mod_256\n.hidden\tsub_n_check_mod_256\n.type\tsub_n_check_mod_256,\\@function,4,\"unwind\"\n.align\t32\nsub_n_check_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n\tsub\t8*0($b_org), @acc[0]\n\t mov\t8*0($n_ptr), @acc[4]\n\tsbb\t8*1($b_org), @acc[1]\n\t mov\t8*1($n_ptr), @acc[5]\n\tsbb\t8*2($b_org), @acc[2]\n\t mov\t8*2($n_ptr), @acc[6]\n\tsbb\t8*3($b_org), @acc[3]\n\t mov\t8*3($n_ptr), @acc[7]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[4]\n\tand\t$b_org, @acc[5]\n\tand\t$b_org, @acc[6]\n\tand\t$b_org, @acc[7]\n\n\tadd\t@acc[4], @acc[0]\n\tadc\t@acc[5], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[6], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[7], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[3], @acc[2]\n\tor\t@acc[2], @acc[0]\n\tmov\t\\$1, %rax\n\tcmovz\t@acc[0], %rax\n\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsub_n_check_mod_256,.-sub_n_check_mod_256\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/add_mod_384-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\n($r_ptr,$a_ptr,$b_ptr,$n_ptr) = map(\"x$_\", 0..3);\n\n@mod=map(\"x$_\",(4..9));\n@a=map(\"x$_\",(10..15));\n@b=map(\"x$_\",(16,17,19..22));\n$carry=$n_ptr;\n\n$code.=<<___;\n.text\n\n.globl\tadd_mod_384\n.hidden\tadd_mod_384\n.type\tadd_mod_384,%function\n.align\t5\nadd_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__add_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tadd_mod_384,.-add_mod_384\n\n.type\t__add_mod_384,%function\n.align\t5\n__add_mod_384:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@b[2],@b[3],[$b_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tldp\t@b[4],@b[5],[$b_ptr,#32]\n\n__add_mod_384_ab_are_loaded:\n\tadds\t@a[0],@a[0],@b[0]\n\tadcs\t@a[1],@a[1],@b[1]\n\tadcs\t@a[2],@a[2],@b[2]\n\tadcs\t@a[3],@a[3],@b[3]\n\tadcs\t@a[4],@a[4],@b[4]\n\tadcs\t@a[5],@a[5],@b[5]\n\tadc\t$carry,xzr,xzr\n\n\tsubs\t@b[0],@a[0],@mod[0]\n\tsbcs\t@b[1],@a[1],@mod[1]\n\tsbcs\t@b[2],@a[2],@mod[2]\n\tsbcs\t@b[3],@a[3],@mod[3]\n\tsbcs\t@b[4],@a[4],@mod[4]\n\tsbcs\t@b[5],@a[5],@mod[5]\n\tsbcs\txzr,$carry,xzr\n\n\tcsel\t@a[0],@a[0],@b[0],lo\n\tcsel\t@a[1],@a[1],@b[1],lo\n\tcsel\t@a[2],@a[2],@b[2],lo\n\tcsel\t@a[3],@a[3],@b[3],lo\n\tcsel\t@a[4],@a[4],@b[4],lo\n\tcsel\t@a[5],@a[5],@b[5],lo\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.globl\tadd_mod_384x\n.hidden\tadd_mod_384x\n.type\tadd_mod_384x,%function\n.align\t5\nadd_mod_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__add_mod_384\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcadd\t$a_ptr,$a_ptr,#48\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tcadd\t$b_ptr,$b_ptr,#48\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tbl\t__add_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tadd_mod_384x,.-add_mod_384x\n\n.globl\trshift_mod_384\n.hidden\trshift_mod_384\n.type\trshift_mod_384,%function\n.align\t5\nrshift_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n.Loop_rshift_mod_384:\n\tsub\t$b_ptr,$b_ptr,#1\n\tbl\t__rshift_mod_384\n\tcbnz\t$b_ptr,.Loop_rshift_mod_384\n\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\trshift_mod_384,.-rshift_mod_384\n\n.type\t__rshift_mod_384,%function\n.align\t5\n__rshift_mod_384:\n\tsbfx\t@b[5],@a[0],#0,#1\n\t and\t@b[0],@b[5],@mod[0]\n\t and\t@b[1],@b[5],@mod[1]\n\tadds\t@a[0],@a[0],@b[0]\n\t and\t@b[2],@b[5],@mod[2]\n\tadcs\t@a[1],@a[1],@b[1]\n\t and\t@b[3],@b[5],@mod[3]\n\tadcs\t@a[2],@a[2],@b[2]\n\t and\t@b[4],@b[5],@mod[4]\n\tadcs\t@a[3],@a[3],@b[3]\n\t and\t@b[5],@b[5],@mod[5]\n\tadcs\t@a[4],@a[4],@b[4]\n\t extr\t@a[0],@a[1],@a[0],#1\t// a[0:5] >>= 1\n\tadcs\t@a[5],@a[5],@b[5]\n\t extr\t@a[1],@a[2],@a[1],#1\n\tadc\t@b[5],xzr,xzr\n\t extr\t@a[2],@a[3],@a[2],#1\n\t extr\t@a[3],@a[4],@a[3],#1\n\t extr\t@a[4],@a[5],@a[4],#1\n\t extr\t@a[5],@b[5],@a[5],#1\n\tret\n.size\t__rshift_mod_384,.-__rshift_mod_384\n\n.globl\tdiv_by_2_mod_384\n.hidden\tdiv_by_2_mod_384\n.type\tdiv_by_2_mod_384,%function\n.align\t5\ndiv_by_2_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__rshift_mod_384\n\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tdiv_by_2_mod_384,.-div_by_2_mod_384\n\n.globl\tlshift_mod_384\n.hidden\tlshift_mod_384\n.type\tlshift_mod_384,%function\n.align\t5\nlshift_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n.Loop_lshift_mod_384:\n\tsub\t$b_ptr,$b_ptr,#1\n\tbl\t__lshift_mod_384\n\tcbnz\t$b_ptr,.Loop_lshift_mod_384\n\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tlshift_mod_384,.-lshift_mod_384\n\n.type\t__lshift_mod_384,%function\n.align\t5\n__lshift_mod_384:\n\tadds\t@a[0],@a[0],@a[0]\n\tadcs\t@a[1],@a[1],@a[1]\n\tadcs\t@a[2],@a[2],@a[2]\n\tadcs\t@a[3],@a[3],@a[3]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t$carry,xzr,xzr\n\n\tsubs\t@b[0],@a[0],@mod[0]\n\tsbcs\t@b[1],@a[1],@mod[1]\n\tsbcs\t@b[2],@a[2],@mod[2]\n\tsbcs\t@b[3],@a[3],@mod[3]\n\tsbcs\t@b[4],@a[4],@mod[4]\n\tsbcs\t@b[5],@a[5],@mod[5]\n\tsbcs\txzr,$carry,xzr\n\n\tcsel\t@a[0],@a[0],@b[0],lo\n\tcsel\t@a[1],@a[1],@b[1],lo\n\tcsel\t@a[2],@a[2],@b[2],lo\n\tcsel\t@a[3],@a[3],@b[3],lo\n\tcsel\t@a[4],@a[4],@b[4],lo\n\tcsel\t@a[5],@a[5],@b[5],lo\n\n\tret\n.size\t__lshift_mod_384,.-__lshift_mod_384\n\n.globl\tmul_by_3_mod_384\n.hidden\tmul_by_3_mod_384\n.type\tmul_by_3_mod_384,%function\n.align\t5\nmul_by_3_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\t@b[0],@b[1],[$a_ptr]\n\tldp\t@b[2],@b[3],[$a_ptr,#16]\n\tldp\t@b[4],@b[5],[$a_ptr,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_by_3_mod_384,.-mul_by_3_mod_384\n\n.globl\tmul_by_8_mod_384\n.hidden\tmul_by_8_mod_384\n.type\tmul_by_8_mod_384,%function\n.align\t5\nmul_by_8_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_by_8_mod_384,.-mul_by_8_mod_384\n\n.globl\tmul_by_3_mod_384x\n.hidden\tmul_by_3_mod_384x\n.type\tmul_by_3_mod_384x,%function\n.align\t5\nmul_by_3_mod_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__lshift_mod_384\n\n\tldp\t@b[0],@b[1],[$a_ptr]\n\tldp\t@b[2],@b[3],[$a_ptr,#16]\n\tldp\t@b[4],@b[5],[$a_ptr,#32]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[0],@a[1],[$a_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tldp\t@a[2],@a[3],[$a_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\tldp\t@a[4],@a[5],[$a_ptr,#80]\n\n\tbl\t__lshift_mod_384\n\n\tldp\t@b[0],@b[1],[$a_ptr,#48]\n\tldp\t@b[2],@b[3],[$a_ptr,#64]\n\tldp\t@b[4],@b[5],[$a_ptr,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_by_3_mod_384x,.-mul_by_3_mod_384x\n\n.globl\tmul_by_8_mod_384x\n.hidden\tmul_by_8_mod_384x\n.type\tmul_by_8_mod_384x,%function\n.align\t5\nmul_by_8_mod_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[0],@a[1],[$a_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tldp\t@a[2],@a[3],[$a_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\tldp\t@a[4],@a[5],[$a_ptr,#80]\n\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tbl\t__lshift_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_by_8_mod_384x,.-mul_by_8_mod_384x\n\n.globl\tcneg_mod_384\n.hidden\tcneg_mod_384\n.type\tcneg_mod_384,%function\n.align\t5\ncneg_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\n\tsubs\t@b[0],@mod[0],@a[0]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\t orr\t$carry,@a[0],@a[1]\n\tsbcs\t@b[1],@mod[1],@a[1]\n\t orr\t$carry,$carry,@a[2]\n\tsbcs\t@b[2],@mod[2],@a[2]\n\t orr\t$carry,$carry,@a[3]\n\tsbcs\t@b[3],@mod[3],@a[3]\n\t orr\t$carry,$carry,@a[4]\n\tsbcs\t@b[4],@mod[4],@a[4]\n\t orr\t$carry,$carry,@a[5]\n\tsbc\t@b[5],@mod[5],@a[5]\n\n\tcmp\t$carry,#0\n\tcsetm\t$carry,ne\n\tands\t$b_ptr,$b_ptr,$carry\n\n\tcsel\t@a[0],@a[0],@b[0],eq\n\tcsel\t@a[1],@a[1],@b[1],eq\n\tcsel\t@a[2],@a[2],@b[2],eq\n\tcsel\t@a[3],@a[3],@b[3],eq\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcsel\t@a[4],@a[4],@b[4],eq\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tcsel\t@a[5],@a[5],@b[5],eq\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tcneg_mod_384,.-cneg_mod_384\n\n.globl\tsub_mod_384\n.hidden\tsub_mod_384\n.type\tsub_mod_384,%function\n.align\t5\nsub_mod_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsub_mod_384,.-sub_mod_384\n\n.type\t__sub_mod_384,%function\n.align\t5\n__sub_mod_384:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@b[2],@b[3],[$b_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tldp\t@b[4],@b[5],[$b_ptr,#32]\n\n\tsubs\t@a[0],@a[0],@b[0]\n\tsbcs\t@a[1],@a[1],@b[1]\n\tsbcs\t@a[2],@a[2],@b[2]\n\tsbcs\t@a[3],@a[3],@b[3]\n\tsbcs\t@a[4],@a[4],@b[4]\n\tsbcs\t@a[5],@a[5],@b[5]\n\tsbc\t$carry,xzr,xzr\n\n\t and\t@b[0],@mod[0],$carry\n\t and\t@b[1],@mod[1],$carry\n\tadds\t@a[0],@a[0],@b[0]\n\t and\t@b[2],@mod[2],$carry\n\tadcs\t@a[1],@a[1],@b[1]\n\t and\t@b[3],@mod[3],$carry\n\tadcs\t@a[2],@a[2],@b[2]\n\t and\t@b[4],@mod[4],$carry\n\tadcs\t@a[3],@a[3],@b[3]\n\t and\t@b[5],@mod[5],$carry\n\tadcs\t@a[4],@a[4],@b[4]\n\tadc\t@a[5],@a[5],@b[5]\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tsub_mod_384x\n.hidden\tsub_mod_384x\n.type\tsub_mod_384x,%function\n.align\t5\nsub_mod_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__sub_mod_384\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcadd\t$a_ptr,$a_ptr,#48\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tcadd\t$b_ptr,$b_ptr,#48\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tbl\t__sub_mod_384\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsub_mod_384x,.-sub_mod_384x\n\n.globl\tmul_by_1_plus_i_mod_384x\n.hidden\tmul_by_1_plus_i_mod_384x\n.type\tmul_by_1_plus_i_mod_384x,%function\n.align\t5\nmul_by_1_plus_i_mod_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\tcadd\t$b_ptr,$a_ptr,#48\n\n\tbl\t__sub_mod_384\t\t\t// a->re - a->im\n\n\tldp\t@b[0],@b[1],[$a_ptr]\n\tldp\t@b[2],@b[3],[$a_ptr,#16]\n\tldp\t@b[4],@b[5],[$a_ptr,#32]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[0],@a[1],[$a_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tldp\t@a[2],@a[3],[$a_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\tldp\t@a[4],@a[5],[$a_ptr,#80]\n\n\tbl\t__add_mod_384_ab_are_loaded\t// a->re + a->im\n\tldr\tc30,[csp,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x\n\n.globl\tsgn0_pty_mod_384\n.hidden\tsgn0_pty_mod_384\n.type\tsgn0_pty_mod_384,%function\n.align\t5\nsgn0_pty_mod_384:\n\thint\t#34\n\tldp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[2],@a[3],[$r_ptr,#16]\n\tldp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$a_ptr]\n\tldp\t@mod[2],@mod[3],[$a_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$a_ptr,#32]\n\n\tand\t$r_ptr,@a[0],#1\n\tadds\t@a[0],@a[0],@a[0]\n\tadcs\t@a[1],@a[1],@a[1]\n\tadcs\t@a[2],@a[2],@a[2]\n\tadcs\t@a[3],@a[3],@a[3]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t$carry,xzr,xzr\n\n\tsubs\t@a[0],@a[0],@mod[0]\n\tsbcs\t@a[1],@a[1],@mod[1]\n\tsbcs\t@a[2],@a[2],@mod[2]\n\tsbcs\t@a[3],@a[3],@mod[3]\n\tsbcs\t@a[4],@a[4],@mod[4]\n\tsbcs\t@a[5],@a[5],@mod[5]\n\tsbc\t$carry,$carry,xzr\n\n\tmvn\t$carry,$carry\n\tand\t$carry,$carry,#2\n\torr\t$r_ptr,$r_ptr,$carry\n\n\tret\n.size\tsgn0_pty_mod_384,.-sgn0_pty_mod_384\n\n.globl\tsgn0_pty_mod_384x\n.hidden\tsgn0_pty_mod_384x\n.type\tsgn0_pty_mod_384x,%function\n.align\t5\nsgn0_pty_mod_384x:\n\thint\t#34\n\tldp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[2],@a[3],[$r_ptr,#16]\n\tldp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$a_ptr]\n\tldp\t@mod[2],@mod[3],[$a_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$a_ptr,#32]\n\n\tand\t$b_ptr,@a[0],#1\n\t orr\t$n_ptr,@a[0],@a[1]\n\tadds\t@a[0],@a[0],@a[0]\n\t orr\t$n_ptr,$n_ptr,@a[2]\n\tadcs\t@a[1],@a[1],@a[1]\n\t orr\t$n_ptr,$n_ptr,@a[3]\n\tadcs\t@a[2],@a[2],@a[2]\n\t orr\t$n_ptr,$n_ptr,@a[4]\n\tadcs\t@a[3],@a[3],@a[3]\n\t orr\t$n_ptr,$n_ptr,@a[5]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t@b[0],xzr,xzr\n\n\tsubs\t@a[0],@a[0],@mod[0]\n\tsbcs\t@a[1],@a[1],@mod[1]\n\tsbcs\t@a[2],@a[2],@mod[2]\n\tsbcs\t@a[3],@a[3],@mod[3]\n\tsbcs\t@a[4],@a[4],@mod[4]\n\tsbcs\t@a[5],@a[5],@mod[5]\n\tsbc\t@b[0],@b[0],xzr\n\n\tldp\t@a[0],@a[1],[$r_ptr,#48]\n\tldp\t@a[2],@a[3],[$r_ptr,#64]\n\tldp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tmvn\t@b[0],@b[0]\n\tand\t@b[0],@b[0],#2\n\torr\t$b_ptr,$b_ptr,@b[0]\n\n\tand\t$r_ptr,@a[0],#1\n\t orr\t$a_ptr,@a[0],@a[1]\n\tadds\t@a[0],@a[0],@a[0]\n\t orr\t$a_ptr,$a_ptr,@a[2]\n\tadcs\t@a[1],@a[1],@a[1]\n\t orr\t$a_ptr,$a_ptr,@a[3]\n\tadcs\t@a[2],@a[2],@a[2]\n\t orr\t$a_ptr,$a_ptr,@a[4]\n\tadcs\t@a[3],@a[3],@a[3]\n\t orr\t$a_ptr,$a_ptr,@a[5]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t@b[0],xzr,xzr\n\n\tsubs\t@a[0],@a[0],@mod[0]\n\tsbcs\t@a[1],@a[1],@mod[1]\n\tsbcs\t@a[2],@a[2],@mod[2]\n\tsbcs\t@a[3],@a[3],@mod[3]\n\tsbcs\t@a[4],@a[4],@mod[4]\n\tsbcs\t@a[5],@a[5],@mod[5]\n\tsbc\t@b[0],@b[0],xzr\n\n\tmvn\t@b[0],@b[0]\n\tand\t@b[0],@b[0],#2\n\torr\t$r_ptr,$r_ptr,@b[0]\n\n\tcmp\t$n_ptr,#0\n\tcsel\t$n_ptr,$r_ptr,$b_ptr,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\t$a_ptr,#0\n\tcsel\t$a_ptr,$r_ptr,$b_ptr,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\t$n_ptr,$n_ptr,#1\n\tand\t$a_ptr,$a_ptr,#2\n\torr\t$r_ptr,$a_ptr,$n_ptr\t// pack sign and parity\n\n\tret\n.size\tsgn0_pty_mod_384x,.-sgn0_pty_mod_384x\n___\nif (1) {\nsub vec_select {\nmy $sz = shift;\nmy @v=map(\"v$_\",(0..5,16..21));\n\n$code.=<<___;\n.globl\tvec_select_$sz\n.hidden\tvec_select_$sz\n.type\tvec_select_$sz,%function\n.align\t5\nvec_select_$sz:\n\thint\t#34\n\tdup\tv6.2d, $n_ptr\n\tld1\t{@v[0].2d, @v[1].2d, @v[2].2d}, [$a_ptr],#48\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{@v[3].2d, @v[4].2d, @v[5].2d}, [$b_ptr],#48\n___\nfor($i=0; $i<$sz-48; $i+=48) {\n$code.=<<___;\n\tbit\t@v[0].16b, @v[3].16b, v6.16b\n\tld1\t{@v[6].2d, @v[7].2d, @v[8].2d}, [$a_ptr],#48\n\tbit\t@v[1].16b, @v[4].16b, v6.16b\n\tld1\t{@v[9].2d, @v[10].2d, @v[11].2d}, [$b_ptr],#48\n\tbit\t@v[2].16b, @v[5].16b, v6.16b\n\tst1\t{@v[0].2d, @v[1].2d, @v[2].2d}, [$r_ptr],#48\n___\n\t@v = @v[6..11,0..5];\n}\n$code.=<<___;\n\tbit\t@v[0].16b, @v[3].16b, v6.16b\n\tbit\t@v[1].16b, @v[4].16b, v6.16b\n\tbit\t@v[2].16b, @v[5].16b, v6.16b\n\tst1\t{@v[0].2d, @v[1].2d, @v[2].2d}, [$r_ptr]\n\tret\n.size\tvec_select_$sz,.-vec_select_$sz\n___\n}\n\n$code.=<<___;\n.globl\tvec_select_32\n.hidden\tvec_select_32\n.type\tvec_select_32,%function\n.align\t5\nvec_select_32:\n\thint\t#34\n\tdup\tv6.2d, $n_ptr\n\tld1\t{v0.2d, v1.2d}, [$a_ptr]\n\tcmeq\tv6.2d, v6.2d, #0\n\tld1\t{v3.2d, v4.2d}, [$b_ptr]\n\tbit\tv0.16b, v3.16b, v6.16b\n\tbit\tv1.16b, v4.16b, v6.16b\n\tst1\t{v0.2d, v1.2d}, [$r_ptr]\n\tret\n.size\tvec_select_32,.-vec_select_32\n___\n\nvec_select(48);\nvec_select(96);\nvec_select(192);\nvec_select(144);\nvec_select(288);\n}\n\n{\nmy ($inp, $end, $step) = map(\"x$_\", (0..2));\n\n$code.=<<___;\n.globl\tvec_prefetch\n.hidden\tvec_prefetch\n.type\tvec_prefetch,%function\n.align\t5\nvec_prefetch:\n\thint\t#34\n\tadd\t$end, $end, $inp\n\tsub\t$end, $end, #1\n\tmov\t$step, #64\n\tprfm\tpldl1keep, [$inp]\n\tadd\t$inp, $inp, $step\n\tcmp\t$inp, $end\n\tcsel\t$inp, $end, $inp, hi\n\tcsel\t$step, xzr, $step, hi\n\tprfm\tpldl1keep, [$inp]\n\tadd\t$inp, $inp, $step\n\tcmp\t$inp, $end\n\tcsel\t$inp, $end, $inp, hi\n\tcsel\t$step, xzr, $step, hi\n\tprfm\tpldl1keep, [$inp]\n\tadd\t$inp, $inp, $step\n\tcmp\t$inp, $end\n\tcsel\t$inp, $end, $inp, hi\n\tcsel\t$step, xzr, $step, hi\n\tprfm\tpldl1keep, [$inp]\n\tadd\t$inp, $inp, $step\n\tcmp\t$inp, $end\n\tcsel\t$inp, $end, $inp, hi\n\tcsel\t$step, xzr, $step, hi\n\tprfm\tpldl1keep, [$inp]\n\tadd\t$inp, $inp, $step\n\tcmp\t$inp, $end\n\tcsel\t$inp, $end, $inp, hi\n\tcsel\t$step, xzr, $step, hi\n\tprfm\tpldl1keep, [$inp]\n\tadd\t$inp, $inp, $step\n\tcmp\t$inp, $end\n\tcsel\t$inp, $end, $inp, hi\n\tprfm\tpldl1keep, [$inp]\n\tret\n.size\tvec_prefetch,.-vec_prefetch\n___\nmy $len = $end;\n\n$code.=<<___;\n.globl\tvec_is_zero_16x\n.hidden\tvec_is_zero_16x\n.type\tvec_is_zero_16x,%function\n.align\t5\nvec_is_zero_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [$inp], #16\n\tlsr\t$len, $len, #4\n\tsub\t$len, $len, #1\n\tcbz\t$len, .Loop_is_zero_done\n\n.Loop_is_zero:\n\tld1\t{v1.2d}, [$inp], #16\n\torr\tv0.16b, v0.16b, v1.16b\n\tsub\t$len, $len, #1\n\tcbnz\t$len, .Loop_is_zero\n\n.Loop_is_zero_done:\n\tdup\tv1.2d, v0.2d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.2d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n.size\tvec_is_zero_16x,.-vec_is_zero_16x\n___\n}\n{\nmy ($inp1, $inp2, $len) = map(\"x$_\", (0..2));\n\n$code.=<<___;\n.globl\tvec_is_equal_16x\n.hidden\tvec_is_equal_16x\n.type\tvec_is_equal_16x,%function\n.align\t5\nvec_is_equal_16x:\n\thint\t#34\n\tld1\t{v0.2d}, [$inp1], #16\n\tld1\t{v1.2d}, [$inp2], #16\n\tlsr\t$len, $len, #4\n\teor\tv0.16b, v0.16b, v1.16b\n\n.Loop_is_equal:\n\tsub\t$len, $len, #1\n\tcbz\t$len, .Loop_is_equal_done\n\tld1\t{v1.2d}, [$inp1], #16\n\tld1\t{v2.2d}, [$inp2], #16\n\teor\tv1.16b, v1.16b, v2.16b\n\torr\tv0.16b, v0.16b, v1.16b\n\tb\t.Loop_is_equal\n\tnop\n\n.Loop_is_equal_done:\n\tdup\tv1.2d, v0.2d[1]\n\torr\tv0.16b, v0.16b, v1.16b\n\tumov\tx1, v0.2d[0]\n\tmov\tx0, #1\n\tcmp\tx1, #0\n\tcsel\tx0, x0, xzr, eq\n\tret\n.size\tvec_is_equal_16x,.-vec_is_equal_16x\n___\n}\n\nprint $code;\n\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/add_mod_384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr,$n0) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\",\"%r8\");\n$b_ptr = \"%rbx\";\n\n{ ############################################################## 384 bits add\nmy @acc=map(\"%r$_\",(8..15, \"ax\", \"bx\", \"bp\"));\n   push(@acc, $a_ptr);\n\n$code.=<<___;\n.text\n\n.globl\tadd_mod_384\n.hidden\tadd_mod_384\n.type\tadd_mod_384,\\@function,4,\"unwind\"\n.align\t32\nadd_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tcall\t__add_mod_384\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tadd_mod_384,.-add_mod_384\n\n.type\t__add_mod_384,\\@abi-omnipotent\n.align\t32\n__add_mod_384:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n__add_mod_384_a_is_loaded:\n\tadd\t8*0($b_org), @acc[0]\n\tadc\t8*1($b_org), @acc[1]\n\tadc\t8*2($b_org), @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t8*3($b_org), @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t8*4($b_org), @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t8*5($b_org), @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$b_org, $b_org\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t@acc[9],  @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[10], @acc[4]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tcmovc\t@acc[11], @acc[5]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.globl\tadd_mod_384x\n.hidden\tadd_mod_384x\n.type\tadd_mod_384x,\\@function,4,\"unwind\"\n.align\t32\nadd_mod_384x:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$24, %rsp\n.cfi_adjust_cfa_offset\t24\n.cfi_end_prologue\n\n\tmov\t$a_ptr, 8*0(%rsp)\n\tmov\t$b_org, 8*1(%rsp)\n\tlea\t48($a_ptr), $a_ptr\t# a->im\n\tlea\t48($b_org), $b_org\t# b->im\n\tlea\t48($r_ptr), $r_ptr\t# ret->im\n\tcall\t__add_mod_384\t\t# add_mod_384(ret->im, a->im, b->im, mod);\n\n\tmov\t8*0(%rsp), $a_ptr\t# a->re\n\tmov\t8*1(%rsp), $b_org\t# b->re\n\tlea\t-48($r_ptr), $r_ptr\t# ret->re\n\tcall\t__add_mod_384\t\t# add_mod_384(ret->re, a->re, b->re, mod);\n\n\tmov\t24+8*0(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t24+8*1(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24+8*2(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t24+8*3(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t24+8*4(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t24+8*5(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24+8*6(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tadd_mod_384x,.-add_mod_384x\n\n########################################################################\n.globl\trshift_mod_384\n.hidden\trshift_mod_384\n.type\trshift_mod_384,\\@function,4,\"unwind\"\n.align\t32\nrshift_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$r_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n.Loop_rshift_mod_384:\n\tcall\t__rshift_mod_384\n\tdec\t%edx\n\tjnz\t.Loop_rshift_mod_384\n\n\tmov\t@acc[0], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\trshift_mod_384,.-rshift_mod_384\n\n.type\t__rshift_mod_384,\\@abi-omnipotent\n.align\t32\n__rshift_mod_384:\n\tmov\t\\$1, @acc[11]\n\tmov\t8*0($n_ptr), @acc[6]\n\tand\t@acc[0], @acc[11]\n\tmov\t8*1($n_ptr), @acc[7]\n\tneg\t@acc[11]\n\tmov\t8*2($n_ptr), @acc[8]\n\tand\t@acc[11], @acc[6]\n\tmov\t8*3($n_ptr), @acc[9]\n\tand\t@acc[11], @acc[7]\n\tmov\t8*4($n_ptr), @acc[10]\n\tand\t@acc[11], @acc[8]\n\tand\t@acc[11], @acc[9]\n\tand\t@acc[11], @acc[10]\n\tand\t8*5($n_ptr), @acc[11]\n\n\tadd\t@acc[0], @acc[6]\n\tadc\t@acc[1], @acc[7]\n\tadc\t@acc[2], @acc[8]\n\tadc\t@acc[3], @acc[9]\n\tadc\t@acc[4], @acc[10]\n\tadc\t@acc[5], @acc[11]\n\tsbb\t@acc[5], @acc[5]\n\n\tshr\t\\$1, @acc[6]\n\tmov\t@acc[7], @acc[0]\n\tshr\t\\$1, @acc[7]\n\tmov\t@acc[8], @acc[1]\n\tshr\t\\$1, @acc[8]\n\tmov\t@acc[9], @acc[2]\n\tshr\t\\$1, @acc[9]\n\tmov\t@acc[10], @acc[3]\n\tshr\t\\$1, @acc[10]\n\tmov\t@acc[11], @acc[4]\n\tshr\t\\$1, @acc[11]\n\tshl\t\\$63, @acc[0]\n\tshl\t\\$63, @acc[1]\n\tor\t@acc[6], @acc[0]\n\tshl\t\\$63, @acc[2]\n\tor\t@acc[7], @acc[1]\n\tshl\t\\$63, @acc[3]\n\tor\t@acc[8], @acc[2]\n\tshl\t\\$63, @acc[4]\n\tor\t@acc[9], @acc[3]\n\tshl\t\\$63, @acc[5]\n\tor\t@acc[10], @acc[4]\n\tor\t@acc[11], @acc[5]\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[6]\n.size\t__rshift_mod_384,.-__rshift_mod_384\n\n.globl\tdiv_by_2_mod_384\n.hidden\tdiv_by_2_mod_384\n.type\tdiv_by_2_mod_384,\\@function,3,\"unwind\"\n.align\t32\ndiv_by_2_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$r_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t$b_org, $n_ptr\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tcall\t__rshift_mod_384\n\n\tmov\t@acc[0], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tdiv_by_2_mod_384,.-div_by_2_mod_384\n\n########################################################################\n.globl\tlshift_mod_384\n.hidden\tlshift_mod_384\n.type\tlshift_mod_384,\\@function,4,\"unwind\"\n.align\t32\nlshift_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$r_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n.Loop_lshift_mod_384:\n\tadd\t@acc[0], @acc[0]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t@acc[3], @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t@acc[4], @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t@acc[5], @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$r_ptr, $r_ptr\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $r_ptr\n\n\tmov\t(%rsp), $r_ptr\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tcmovc\t@acc[9],  @acc[3]\n\tcmovc\t@acc[10], @acc[4]\n\tcmovc\t@acc[11], @acc[5]\n\n\tdec\t%edx\n\tjnz\t.Loop_lshift_mod_384\n\n\tmov\t@acc[0], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tlshift_mod_384,.-lshift_mod_384\n\n.type\t__lshift_mod_384,\\@abi-omnipotent\n.align\t32\n__lshift_mod_384:\n\tadd\t@acc[0], @acc[0]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t@acc[3], @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t@acc[4], @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t@acc[5], @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$b_org, $b_org\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tcmovc\t@acc[9],  @acc[3]\n\tcmovc\t@acc[10], @acc[4]\n\tcmovc\t@acc[11], @acc[5]\n\n\tret\n.size\t__lshift_mod_384,.-__lshift_mod_384\n\n########################################################################\n.globl\tmul_by_3_mod_384\n.hidden\tmul_by_3_mod_384\n.type\tmul_by_3_mod_384,\\@function,3,\"unwind\"\n.align\t32\nmul_by_3_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$a_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t$b_org, $n_ptr\n\n\tcall\t__lshift_mod_384\n\n\tmov\t(%rsp), $b_org\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_by_3_mod_384,.-mul_by_3_mod_384\n\n.globl\tmul_by_8_mod_384\n.hidden\tmul_by_8_mod_384\n.type\tmul_by_8_mod_384,\\@function,3,\"unwind\"\n.align\t32\nmul_by_8_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t$b_org, $n_ptr\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmov\t@acc[0], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_by_8_mod_384,.-mul_by_8_mod_384\n\n########################################################################\n.globl\tmul_by_3_mod_384x\n.hidden\tmul_by_3_mod_384x\n.type\tmul_by_3_mod_384x,\\@function,3,\"unwind\"\n.align\t32\nmul_by_3_mod_384x:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$a_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t$b_org, $n_ptr\n\n\tcall\t__lshift_mod_384\n\n\tmov\t(%rsp), $b_org\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmov\t(%rsp), $a_ptr\n\tlea\t8*6($r_ptr), $r_ptr\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*6($a_ptr), @acc[0]\n\tmov\t8*7($a_ptr), @acc[1]\n\tmov\t8*8($a_ptr), @acc[2]\n\tmov\t8*9($a_ptr), @acc[3]\n\tmov\t8*10($a_ptr), @acc[4]\n\tmov\t8*11($a_ptr), @acc[5]\n\n\tcall\t__lshift_mod_384\n\n\tmov\t\\$8*6, $b_org\n\tadd\t(%rsp), $b_org\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__add_mod_384_a_is_loaded\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_by_3_mod_384x,.-mul_by_3_mod_384x\n\n.globl\tmul_by_8_mod_384x\n.hidden\tmul_by_8_mod_384x\n.type\tmul_by_8_mod_384x,\\@function,3,\"unwind\"\n.align\t32\nmul_by_8_mod_384x:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$a_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t$b_org, $n_ptr\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmov\t(%rsp), $a_ptr\n\tmov\t@acc[0], 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t48+8*0($a_ptr), @acc[0]\n\tmov\t48+8*1($a_ptr), @acc[1]\n\tmov\t48+8*2($a_ptr), @acc[2]\n\tmov\t48+8*3($a_ptr), @acc[3]\n\tmov\t48+8*4($a_ptr), @acc[4]\n\tmov\t48+8*5($a_ptr), @acc[5]\n\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\tcall\t__lshift_mod_384\n\n\tmov\t@acc[0], 48+8*0($r_ptr)\n\tmov\t@acc[1], 48+8*1($r_ptr)\n\tmov\t@acc[2], 48+8*2($r_ptr)\n\tmov\t@acc[3], 48+8*3($r_ptr)\n\tmov\t@acc[4], 48+8*4($r_ptr)\n\tmov\t@acc[5], 48+8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_by_8_mod_384x,.-mul_by_8_mod_384x\n\n########################################################################\n.globl\tcneg_mod_384\n.hidden\tcneg_mod_384\n.type\tcneg_mod_384,\\@function,4,\"unwind\"\n.align\t32\ncneg_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$b_org\t\t\t# condition flag\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), $b_org\t# load a[0:5]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t$b_org, @acc[0]\n\tmov\t8*3($a_ptr), @acc[3]\n\tor\t@acc[1], $b_org\n\tmov\t8*4($a_ptr), @acc[4]\n\tor\t@acc[2], $b_org\n\tmov\t8*5($a_ptr), @acc[5]\n\tor\t@acc[3], $b_org\n\tmov\t\\$-1, @acc[11]\n\tor\t@acc[4], $b_org\n\tor\t@acc[5], $b_org\n\n\tmov\t8*0($n_ptr), @acc[6]\t# load n[0:5]\n\tcmovnz\t@acc[11], $b_org\t# mask = a[0:5] ? -1 : 0\n\tmov\t8*1($n_ptr), @acc[7]\n\tmov\t8*2($n_ptr), @acc[8]\n\tand\t$b_org, @acc[6]\t\t# n[0:5] &= mask\n\tmov\t8*3($n_ptr), @acc[9]\n\tand\t$b_org, @acc[7]\n\tmov\t8*4($n_ptr), @acc[10]\n\tand\t$b_org, @acc[8]\n\tmov\t8*5($n_ptr), @acc[11]\n\tand\t$b_org, @acc[9]\n\tmov\t0(%rsp), $n_ptr\t\t# restore condition flag\n\tand\t$b_org, @acc[10]\n\tand\t$b_org, @acc[11]\n\n\tsub\t@acc[0], @acc[6]\t# a[0:5] ? n[0:5]-a[0:5] : 0-0\n\tsbb\t@acc[1], @acc[7]\n\tsbb\t@acc[2], @acc[8]\n\tsbb\t@acc[3], @acc[9]\n\tsbb\t@acc[4], @acc[10]\n\tsbb\t@acc[5], @acc[11]\n\n\tor\t$n_ptr, $n_ptr\t\t# check condition flag\n\n\tcmovz\t@acc[0], @acc[6]\t# flag ? n[0:5]-a[0:5] : a[0:5]\n\tcmovz\t@acc[1], @acc[7]\n\tcmovz\t@acc[2], @acc[8]\n\tmov\t@acc[6], 8*0($r_ptr)\n\tcmovz\t@acc[3], @acc[9]\n\tmov\t@acc[7], 8*1($r_ptr)\n\tcmovz\t@acc[4], @acc[10]\n\tmov\t@acc[8], 8*2($r_ptr)\n\tcmovz\t@acc[5], @acc[11]\n\tmov\t@acc[9], 8*3($r_ptr)\n\tmov\t@acc[10], 8*4($r_ptr)\n\tmov\t@acc[11], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tcneg_mod_384,.-cneg_mod_384\n\n########################################################################\n.globl\tsub_mod_384\n.hidden\tsub_mod_384\n.type\tsub_mod_384,\\@function,4,\"unwind\"\n.align\t32\nsub_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tcall\t__sub_mod_384\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsub_mod_384,.-sub_mod_384\n\n.type\t__sub_mod_384,\\@abi-omnipotent\n.align\t32\n__sub_mod_384:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tsub\t8*0($b_org), @acc[0]\n\t mov\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($b_org), @acc[1]\n\t mov\t8*1($n_ptr), @acc[7]\n\tsbb\t8*2($b_org), @acc[2]\n\t mov\t8*2($n_ptr), @acc[8]\n\tsbb\t8*3($b_org), @acc[3]\n\t mov\t8*3($n_ptr), @acc[9]\n\tsbb\t8*4($b_org), @acc[4]\n\t mov\t8*4($n_ptr), @acc[10]\n\tsbb\t8*5($b_org), @acc[5]\n\t mov\t8*5($n_ptr), @acc[11]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[6]\n\tand\t$b_org, @acc[7]\n\tand\t$b_org, @acc[8]\n\tand\t$b_org, @acc[9]\n\tand\t$b_org, @acc[10]\n\tand\t$b_org, @acc[11]\n\n\tadd\t@acc[6], @acc[0]\n\tadc\t@acc[7], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[8], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[9], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tadc\t@acc[10], @acc[4]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tadc\t@acc[11], @acc[5]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tsub_mod_384x\n.hidden\tsub_mod_384x\n.type\tsub_mod_384x,\\@function,4,\"unwind\"\n.align\t32\nsub_mod_384x:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$24, %rsp\n.cfi_adjust_cfa_offset\t24\n.cfi_end_prologue\n\n\tmov\t$a_ptr, 8*0(%rsp)\n\tmov\t$b_org, 8*1(%rsp)\n\tlea\t48($a_ptr), $a_ptr\t# a->im\n\tlea\t48($b_org), $b_org\t# b->im\n\tlea\t48($r_ptr), $r_ptr\t# ret->im\n\tcall\t__sub_mod_384\t\t# sub_mod_384(ret->im, a->im, b->im, mod);\n\n\tmov\t8*0(%rsp), $a_ptr\t# a->re\n\tmov\t8*1(%rsp), $b_org\t# b->re\n\tlea\t-48($r_ptr), $r_ptr\t# ret->re\n\tcall\t__sub_mod_384\t\t# sub_mod_384(ret->re, a->re, b->re, mod);\n\n\tmov\t24+8*0(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t24+8*1(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24+8*2(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t24+8*3(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t24+8*4(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t24+8*5(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24+8*6(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsub_mod_384x,.-sub_mod_384x\n___\n}\n{ ###################################################### ret = a * (1 + i)\nmy ($r_ptr,$a_ptr,$n_ptr) = (\"%rdi\",\"%rsi\",\"%rdx\");\nmy @acc=map(\"%r$_\",(8..15, \"ax\", \"bx\", \"cx\", \"bp\"));\n\n$code.=<<___;\n.globl\tmul_by_1_plus_i_mod_384x\n.hidden\tmul_by_1_plus_i_mod_384x\n.type\tmul_by_1_plus_i_mod_384x,\\@function,3,\"unwind\"\n.align\t32\nmul_by_1_plus_i_mod_384x:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$56, %rsp\n.cfi_adjust_cfa_offset\t56\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tmov\t@acc[0], @acc[6]\n\tadd\t8*6($a_ptr), @acc[0]\t# a->re + a->im\n\tmov\t@acc[1], @acc[7]\n\tadc\t8*7($a_ptr), @acc[1]\n\tmov\t@acc[2], @acc[8]\n\tadc\t8*8($a_ptr), @acc[2]\n\tmov\t@acc[3], @acc[9]\n\tadc\t8*9($a_ptr), @acc[3]\n\tmov\t@acc[4], @acc[10]\n\tadc\t8*10($a_ptr), @acc[4]\n\tmov\t@acc[5], @acc[11]\n\tadc\t8*11($a_ptr), @acc[5]\n\tmov\t$r_ptr, 8*6(%rsp)\t# offload r_ptr\n\tsbb\t$r_ptr, $r_ptr\n\n\tsub\t8*6($a_ptr), @acc[6]\t# a->re - a->im\n\tsbb\t8*7($a_ptr), @acc[7]\n\tsbb\t8*8($a_ptr), @acc[8]\n\tsbb\t8*9($a_ptr), @acc[9]\n\tsbb\t8*10($a_ptr), @acc[10]\n\tsbb\t8*11($a_ptr), @acc[11]\n\tsbb\t$a_ptr, $a_ptr\n\n\tmov\t@acc[0], 8*0(%rsp)\t# offload a->re + a->im [without carry]\n\t mov\t8*0($n_ptr), @acc[0]\n\tmov\t@acc[1], 8*1(%rsp)\n\t mov\t8*1($n_ptr), @acc[1]\n\tmov\t@acc[2], 8*2(%rsp)\n\t mov\t8*2($n_ptr), @acc[2]\n\tmov\t@acc[3], 8*3(%rsp)\n\t mov\t8*3($n_ptr), @acc[3]\n\tmov\t@acc[4], 8*4(%rsp)\n\t and\t$a_ptr, @acc[0]\n\t mov\t8*4($n_ptr), @acc[4]\n\tmov\t@acc[5], 8*5(%rsp)\n\t and\t$a_ptr, @acc[1]\n\t mov\t8*5($n_ptr), @acc[5]\n\t and\t$a_ptr, @acc[2]\n\t and\t$a_ptr, @acc[3]\n\t and\t$a_ptr, @acc[4]\n\t and\t$a_ptr, @acc[5]\n\tmov\t8*6(%rsp), $a_ptr\t# restore r_ptr\n\n\tadd\t@acc[0], @acc[6]\n\t mov\t8*0(%rsp), @acc[0]\t# restore a->re + a->im\n\tadc\t@acc[1], @acc[7]\n\t mov\t8*1(%rsp), @acc[1]\n\tadc\t@acc[2], @acc[8]\n\t mov\t8*2(%rsp), @acc[2]\n\tadc\t@acc[3], @acc[9]\n\t mov\t8*3(%rsp), @acc[3]\n\tadc\t@acc[4], @acc[10]\n\t mov\t8*4(%rsp), @acc[4]\n\tadc\t@acc[5], @acc[11]\n\t mov\t8*5(%rsp), @acc[5]\n\n\tmov\t@acc[6], 8*0($a_ptr)\t# ret->re = a->re - a->im\n\t mov\t@acc[0], @acc[6]\n\tmov\t@acc[7], 8*1($a_ptr)\n\tmov\t@acc[8], 8*2($a_ptr)\n\t mov\t@acc[1], @acc[7]\n\tmov\t@acc[9], 8*3($a_ptr)\n\tmov\t@acc[10], 8*4($a_ptr)\n\t mov\t@acc[2], @acc[8]\n\tmov\t@acc[11], 8*5($a_ptr)\n\n\tsub\t8*0($n_ptr), @acc[0]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $r_ptr\n\n\tcmovc\t@acc[6], @acc[0]\n\tcmovc\t@acc[7], @acc[1]\n\tcmovc\t@acc[8], @acc[2]\n\tmov\t@acc[0], 8*6($a_ptr)\t# ret->im = a->re + a->im\n\tcmovc\t@acc[9], @acc[3]\n\tmov\t@acc[1], 8*7($a_ptr)\n\tcmovc\t@acc[10], @acc[4]\n\tmov\t@acc[2], 8*8($a_ptr)\n\tcmovc\t@acc[11], @acc[5]\n\tmov\t@acc[3], 8*9($a_ptr)\n\tmov\t@acc[4], 8*10($a_ptr)\n\tmov\t@acc[5], 8*11($a_ptr)\n\n\tmov\t56+8*0(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t56+8*1(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t56+8*2(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t56+8*3(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t56+8*4(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t56+8*5(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56+8*6(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_by_1_plus_i_mod_384x,.-mul_by_1_plus_i_mod_384x\n___\n}\n{ ######################################################\nmy ($r_ptr,$n_ptr) = (\"%rdi\",\"%rsi\");\nmy @acc=map(\"%r$_\",(8..11, \"cx\", \"dx\", \"bx\", \"bp\"));\n\n$code.=<<___;\n.globl\tsgn0_pty_mod_384\n.hidden\tsgn0_pty_mod_384\n.type\tsgn0_pty_mod_384,\\@function,2,\"unwind\"\n.align\t32\nsgn0_pty_mod_384:\n.cfi_startproc\n.cfi_end_prologue\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($r_ptr), @acc[0]\n\tmov\t8*1($r_ptr), @acc[1]\n\tmov\t8*2($r_ptr), @acc[2]\n\tmov\t8*3($r_ptr), @acc[3]\n\tmov\t8*4($r_ptr), @acc[4]\n\tmov\t8*5($r_ptr), @acc[5]\n\n\txor\t%rax, %rax\n\tmov\t@acc[0], $r_ptr\n\tadd\t@acc[0], @acc[0]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, %rax\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, %rax\n\n\tnot\t%rax\t\t\t# 2*x > p, which means \"negative\"\n\tand\t\\$1, $r_ptr\n\tand\t\\$2, %rax\n\tor\t$r_ptr, %rax\t\t# pack sign and parity\n\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsgn0_pty_mod_384,.-sgn0_pty_mod_384\n\n.globl\tsgn0_pty_mod_384x\n.hidden\tsgn0_pty_mod_384x\n.type\tsgn0_pty_mod_384x,\\@function,2,\"unwind\"\n.align\t32\nsgn0_pty_mod_384x:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*6($r_ptr), @acc[0]\t# sgn0(a->im)\n\tmov\t8*7($r_ptr), @acc[1]\n\tmov\t8*8($r_ptr), @acc[2]\n\tmov\t8*9($r_ptr), @acc[3]\n\tmov\t8*10($r_ptr), @acc[4]\n\tmov\t8*11($r_ptr), @acc[5]\n\n\tmov\t@acc[0], @acc[6]\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[2], @acc[0]\n\tor\t@acc[3], @acc[0]\n\tor\t@acc[4], @acc[0]\n\tor\t@acc[5], @acc[0]\n\n\tlea\t0($r_ptr), %rax\t\t# sgn0(a->re)\n\txor\t$r_ptr, $r_ptr\n\tmov\t@acc[6], @acc[7]\n\tadd\t@acc[6], @acc[6]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, $r_ptr\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $r_ptr\n\n\tmov\t@acc[0], 0(%rsp)\t# a->im is zero or not\n\tnot\t$r_ptr\t\t\t# 2*x > p, which means \"negative\"\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, $r_ptr\n\tor\t@acc[7], $r_ptr\t\t# pack sign and parity\n\n\tmov\t8*0(%rax), @acc[0]\n\tmov\t8*1(%rax), @acc[1]\n\tmov\t8*2(%rax), @acc[2]\n\tmov\t8*3(%rax), @acc[3]\n\tmov\t8*4(%rax), @acc[4]\n\tmov\t8*5(%rax), @acc[5]\n\n\tmov\t@acc[0], @acc[6]\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[2], @acc[0]\n\tor\t@acc[3], @acc[0]\n\tor\t@acc[4], @acc[0]\n\tor\t@acc[5], @acc[0]\n\n\txor\t%rax, %rax\n\tmov\t@acc[6], @acc[7]\n\tadd\t@acc[6], @acc[6]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, %rax\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, %rax\n\n\tmov\t0(%rsp), @acc[6]\n\n\tnot\t%rax\t\t\t# 2*x > p, which means \"negative\"\n\n\ttest\t@acc[0], @acc[0]\n\tcmovz\t$r_ptr, @acc[7]\t\t# a->re==0? prty(a->im) : prty(a->re)\n\n\ttest\t@acc[6], @acc[6]\n\tcmovnz\t$r_ptr, %rax\t\t# a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, %rax\n\tor\t@acc[7], %rax\t\t# pack sign and parity\n\n\tmov\t8(%rsp), %rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp), %rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp), %rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsgn0_pty_mod_384x,.-sgn0_pty_mod_384x\n___\n}\nif (0) {\nmy $inp = $win64 ? \"%rcx\" : \"%rdi\";\n$code.=<<___;\n.globl\tnbits_384\n.hidden\tnbits_384\n.type\tnbits_384,\\@abi-omnipotent\n.align\t32\nnbits_384:\n\tmov\t8*5($inp), %r8\n\tmov\t8*4($inp), %r9\n\tmov\t8*3($inp), %r10\n\tmov\t8*2($inp), %r11\n\tmov\t\\$-1, %rdx\n\tmov\t\\$127, %eax\n\tbsr\t%r8, %r8\n\tcmovnz\t%rdx,%r9\n\tcmovz\t%rax,%r8\n\tbsr\t%r9, %r9\n\tcmovnz\t%rdx,%r10\n\tcmovz\t%rax,%r9\n\txor\t\\$63,%r8\n\tbsr\t%r10, %r10\n\tcmovnz\t%rdx, %r11\n\tcmovz\t%rax, %r10\n\txor\t\\$63,%r9\n\tadd\t%r8, %r9\n\tmov\t8*1($inp), %r8\n\tbsr\t%r11, %r11\n\tcmovnz\t%rdx, %r8\n\tcmovz\t%rax, %r11\n\txor\t\\$63, %r10\n\tadd\t%r9, %r10\n\tmov\t8*0($inp), %r9\n\tbsr\t%r8, %r8\n\tcmovnz\t%rdx, %r9\n\tcmovz\t%rax, %r8\n\txor\t\\$63, %r11\n\tadd\t%r10, %r11\n\tbsr\t%r9, %r9\n\tcmovz\t%rax, %r9\n\txor\t\\$63, %r8\n\tadd\t%r11, %r8\n\txor\t\\$63, %r9\n\tadd\t%r8, %r9\n\tmov\t\\$384, %eax\n\tsub\t%r9, %rax\n\tret\n.size\tnbits_384,.-nbits_384\n___\n}\n\nif (1) {\nmy ($out, $inp1, $inp2, $select) = $win64 ? (\"%rcx\", \"%rdx\", \"%r8\", \"%r9d\")\n                                          : (\"%rdi\", \"%rsi\", \"%rdx\", \"%ecx\");\n\nsub vec_select {\nmy $sz = shift;\nmy $half = $sz/2;\nmy ($xmm0,$xmm1,$xmm2,$xmm3)=map(\"%xmm$_\",(0..3));\n\n$code.=<<___;\n.globl\tvec_select_$sz\n.hidden\tvec_select_$sz\n.type\tvec_select_$sz,\\@abi-omnipotent\n.align\t32\nvec_select_$sz:\n\tmovd\t$select, %xmm5\n\tpxor\t%xmm4,%xmm4\n\tpshufd\t\\$0,%xmm5,%xmm5\t\t# broadcast\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t($inp1),$xmm0\n\tlea\t$half($inp1),$inp1\n\tpcmpeqd\t%xmm4,%xmm5\n\tmovdqu\t($inp2),$xmm1\n\tlea\t$half($inp2),$inp2\n\tpcmpeqd\t%xmm5,%xmm4\n\tlea\t$half($out),$out\n___\nfor($i=0; $i<$sz-16; $i+=16) {\n$code.=<<___;\n\tpand\t%xmm4,$xmm0\n\tmovdqu\t$i+16-$half($inp1),$xmm2\n\tpand\t%xmm5,$xmm1\n\tmovdqu\t$i+16-$half($inp2),$xmm3\n\tpor\t$xmm1,$xmm0\n\tmovdqu\t$xmm0,$i-$half($out)\n___\n\t($xmm0,$xmm1,$xmm2,$xmm3)=($xmm2,$xmm3,$xmm0,$xmm1);\n}\n$code.=<<___;\n\tpand\t%xmm4,$xmm0\n\tpand\t%xmm5,$xmm1\n\tpor\t$xmm1,$xmm0\n\tmovdqu\t$xmm0,$i-$half($out)\n\tret\n.size\tvec_select_$sz,.-vec_select_$sz\n___\n}\nvec_select(32);\nvec_select(48);\nvec_select(96);\nvec_select(192);\nvec_select(144);\nvec_select(288);\n}\n\n{\nmy ($inp, $end) = $win64 ? (\"%rcx\", \"%rdx\") : (\"%rdi\", \"%rsi\");\n\n$code.=<<___;\n.globl\tvec_prefetch\n.hidden\tvec_prefetch\n.type\tvec_prefetch,\\@abi-omnipotent\n.align\t32\nvec_prefetch:\n\tleaq\t\t-1($inp,$end), $end\n\tmov\t\t\\$64, %rax\n\txor\t\t%r8, %r8\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tprefetchnta\t($inp)\n\tlea\t\t($inp,%rax), $inp\n\tcmp\t\t$end, $inp\n\tcmova\t\t$end, $inp\n\tcmova\t\t%r8, %rax\n\tprefetchnta\t($inp)\n\tlea\t\t($inp,%rax), $inp\n\tcmp\t\t$end, $inp\n\tcmova\t\t$end, $inp\n\tcmova\t\t%r8, %rax\n\tprefetchnta\t($inp)\n\tlea\t\t($inp,%rax), $inp\n\tcmp\t\t$end, $inp\n\tcmova\t\t$end, $inp\n\tcmova\t\t%r8, %rax\n\tprefetchnta\t($inp)\n\tlea\t\t($inp,%rax), $inp\n\tcmp\t\t$end, $inp\n\tcmova\t\t$end, $inp\n\tcmova\t\t%r8, %rax\n\tprefetchnta\t($inp)\n\tlea\t\t($inp,%rax), $inp\n\tcmp\t\t$end, $inp\n\tcmova\t\t$end, $inp\n\tcmova\t\t%r8, %rax\n\tprefetchnta\t($inp)\n\tlea\t\t($inp,%rax), $inp\n\tcmp\t\t$end, $inp\n\tcmova\t\t$end, $inp\n\tprefetchnta\t($inp)\n\tret\n.size\tvec_prefetch,.-vec_prefetch\n___\nmy $len = $win64 ? \"%edx\" : \"%esi\";\n\n$code.=<<___;\n.globl\tvec_is_zero_16x\n.hidden\tvec_is_zero_16x\n.type\tvec_is_zero_16x,\\@abi-omnipotent\n.align\t32\nvec_is_zero_16x:\n\tshr\t\t\\$4, $len\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t\t($inp), %xmm0\n\tlea\t\t16($inp), $inp\n\n.Loop_is_zero:\n\tdec\t\t$len\n\tjz\t\t.Loop_is_zero_done\n\tmovdqu\t\t($inp), %xmm1\n\tlea\t\t16($inp), $inp\n\tpor\t\t%xmm1, %xmm0\n\tjmp\t\t.Loop_is_zero\n\n.Loop_is_zero_done:\n\tpshufd\t\t\\$0x4e, %xmm0, %xmm1\n\tpor\t\t%xmm1, %xmm0\n\tmovq\t\t%xmm0, %rax\n\tinc\t\t$len\t\t\t# now it's 1\n\ttest\t\t%rax, %rax\n\tcmovnz\t\t$len, %eax\n\txor\t\t\\$1, %eax\n\tret\n.size\tvec_is_zero_16x,.-vec_is_zero_16x\n___\n}\n{\nmy ($inp1, $inp2, $len) = $win64 ? (\"%rcx\", \"%rdx\", \"%r8d\")\n                                 : (\"%rdi\", \"%rsi\", \"%edx\");\n$code.=<<___;\n.globl\tvec_is_equal_16x\n.hidden\tvec_is_equal_16x\n.type\tvec_is_equal_16x,\\@abi-omnipotent\n.align\t32\nvec_is_equal_16x:\n\tshr\t\t\\$4, $len\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmovdqu\t\t($inp1), %xmm0\n\tmovdqu\t\t($inp2), %xmm1\n\tsub\t\t$inp1, $inp2\n\tlea\t\t16($inp1), $inp1\n\tpxor\t\t%xmm1, %xmm0\n\n.Loop_is_equal:\n\tdec\t\t$len\n\tjz\t\t.Loop_is_equal_done\n\tmovdqu\t\t($inp1), %xmm1\n\tmovdqu\t\t($inp1,$inp2), %xmm2\n\tlea\t\t16($inp1), $inp1\n\tpxor\t\t%xmm2, %xmm1\n\tpor\t\t%xmm1, %xmm0\n\tjmp\t\t.Loop_is_equal\n\n.Loop_is_equal_done:\n\tpshufd\t\t\\$0x4e, %xmm0, %xmm1\n\tpor\t\t%xmm1, %xmm0\n\tmovq\t\t%xmm0, %rax\n\tinc\t\t$len\t\t\t# now it's 1\n\ttest\t\t%rax, %rax\n\tcmovnz\t\t$len, %eax\n\txor\t\t\\$1, %eax\n\tret\n.size\tvec_is_equal_16x,.-vec_is_equal_16x\n___\n}\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/add_mod_384x384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr,$n0) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\",\"%r8\");\n$b_ptr = \"%rbx\";\n\n# common accumulator layout\n@acc=map(\"%r$_\",(8..15));\n\n############################################################ 384x384 add/sub\n# Double-width addition/subtraction modulo n<<384, as opposite to\n# naively expected modulo n*n. It works because n<<384 is the actual\n# input boundary condition for Montgomery reduction, not n*n.\n# Just in case, this is duplicated, but only one module is\n# supposed to be linked...\n{\nmy @acc=(@acc,\"%rax\",\"%rbx\",\"%rbp\",$a_ptr);\t# all registers are affected\n\t\t\t\t\t\t# except for $n_ptr and $r_ptr\n$code.=<<___;\n.text\n\n.globl\tadd_mod_384x384\n.hidden\tadd_mod_384x384\n.type\tadd_mod_384x384,\\@function,4,\"unwind\"\n.align\t32\nadd_mod_384x384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t8*6($a_ptr), @acc[6]\n\n\tadd\t8*0($b_org), @acc[0]\n\tmov\t8*7($a_ptr), @acc[7]\n\tadc\t8*1($b_org), @acc[1]\n\tmov\t8*8($a_ptr), @acc[8]\n\tadc\t8*2($b_org), @acc[2]\n\tmov\t8*9($a_ptr), @acc[9]\n\tadc\t8*3($b_org), @acc[3]\n\tmov\t8*10($a_ptr), @acc[10]\n\tadc\t8*4($b_org), @acc[4]\n\tmov\t8*11($a_ptr), @acc[11]\n\tadc\t8*5($b_org), @acc[5]\n\t mov\t@acc[0], 8*0($r_ptr)\n\tadc\t8*6($b_org), @acc[6]\n\t mov\t@acc[1], 8*1($r_ptr)\n\tadc\t8*7($b_org), @acc[7]\n\t mov\t@acc[2], 8*2($r_ptr)\n\tadc\t8*8($b_org), @acc[8]\n\t mov\t@acc[4], 8*4($r_ptr)\n\t mov\t@acc[6], @acc[0]\n\tadc\t8*9($b_org), @acc[9]\n\t mov\t@acc[3], 8*3($r_ptr)\n\t mov\t@acc[7], @acc[1]\n\tadc\t8*10($b_org), @acc[10]\n\t mov\t@acc[5], 8*5($r_ptr)\n\t mov\t@acc[8], @acc[2]\n\tadc\t8*11($b_org), @acc[11]\n\t mov\t@acc[9], @acc[3]\n\tsbb\t$b_org, $b_org\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[7]\n\t mov\t@acc[10], @acc[4]\n\tsbb\t8*2($n_ptr), @acc[8]\n\tsbb\t8*3($n_ptr), @acc[9]\n\tsbb\t8*4($n_ptr), @acc[10]\n\t mov\t@acc[11], @acc[5]\n\tsbb\t8*5($n_ptr), @acc[11]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[0], @acc[6]\n\tcmovc\t@acc[1], @acc[7]\n\tcmovc\t@acc[2], @acc[8]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tcmovc\t@acc[3], @acc[9]\n\tmov\t@acc[7], 8*7($r_ptr)\n\tcmovc\t@acc[4], @acc[10]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tcmovc\t@acc[5], @acc[11]\n\tmov\t@acc[9], 8*9($r_ptr)\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tadd_mod_384x384,.-add_mod_384x384\n\n.globl\tsub_mod_384x384\n.hidden\tsub_mod_384x384\n.type\tsub_mod_384x384,\\@function,4,\"unwind\"\n.align\t32\nsub_mod_384x384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t8*6($a_ptr), @acc[6]\n\n\tsub\t8*0($b_org), @acc[0]\n\tmov\t8*7($a_ptr), @acc[7]\n\tsbb\t8*1($b_org), @acc[1]\n\tmov\t8*8($a_ptr), @acc[8]\n\tsbb\t8*2($b_org), @acc[2]\n\tmov\t8*9($a_ptr), @acc[9]\n\tsbb\t8*3($b_org), @acc[3]\n\tmov\t8*10($a_ptr), @acc[10]\n\tsbb\t8*4($b_org), @acc[4]\n\tmov\t8*11($a_ptr), @acc[11]\n\tsbb\t8*5($b_org), @acc[5]\n\t mov\t@acc[0], 8*0($r_ptr)\n\tsbb\t8*6($b_org), @acc[6]\n\t mov\t8*0($n_ptr), @acc[0]\n\t mov\t@acc[1], 8*1($r_ptr)\n\tsbb\t8*7($b_org), @acc[7]\n\t mov\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[2], 8*2($r_ptr)\n\tsbb\t8*8($b_org), @acc[8]\n\t mov\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[3], 8*3($r_ptr)\n\tsbb\t8*9($b_org), @acc[9]\n\t mov\t8*3($n_ptr), @acc[3]\n\t mov\t@acc[4], 8*4($r_ptr)\n\tsbb\t8*10($b_org), @acc[10]\n\t mov\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], 8*5($r_ptr)\n\tsbb\t8*11($b_org), @acc[11]\n\t mov\t8*5($n_ptr), @acc[5]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[0]\n\tand\t$b_org, @acc[1]\n\tand\t$b_org, @acc[2]\n\tand\t$b_org, @acc[3]\n\tand\t$b_org, @acc[4]\n\tand\t$b_org, @acc[5]\n\n\tadd\t@acc[0], @acc[6]\n\tadc\t@acc[1], @acc[7]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tadc\t@acc[2], @acc[8]\n\tmov\t@acc[7], 8*7($r_ptr)\n\tadc\t@acc[3], @acc[9]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tadc\t@acc[4], @acc[10]\n\tmov\t@acc[9], 8*9($r_ptr)\n\tadc\t@acc[5], @acc[11]\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsub_mod_384x384,.-sub_mod_384x384\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/arm-xlate.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# ARM assembler distiller/adapter by \\@dot-asm.\n\nuse strict;\n\n################################################################\n# Recognized \"flavour\"-s are:\n#\n# linux[32|64]\tGNU assembler, effectively pass-through\n# ios[32|64]\tglobal symbols' decorations, PIC tweaks, etc.\n# win[32|64]\tVisual Studio armasm-specific directives\n# coff[32|64]\te.g. clang --target=arm-windows ...\n# cheri64\tL64P128 platform\n#\nmy $flavour = shift;\n   $flavour = \"linux\" if (!$flavour or $flavour eq \"void\");\n\nmy $output = shift;\nopen STDOUT,\">$output\" || die \"can't open $output: $!\";\n\nmy %GLOBALS;\nmy $dotinlocallabels = ($flavour !~ /ios/) ? 1 : 0;\nmy $in_proc;\t# used with 'windows' flavour\n\n################################################################\n# directives which need special treatment on different platforms\n################################################################\nmy $arch = sub { } if ($flavour !~ /linux|coff64/);# omit .arch\nmy $fpu  = sub { } if ($flavour !~ /linux/);       # omit .fpu\n\nmy $rodata = sub {\n    SWITCH: for ($flavour) {\n\t/linux|cheri/\t&& return \".section\\t.rodata\";\n\t/ios/\t\t&& return \".section\\t__TEXT,__const\";\n\t/coff/\t\t&& return \".section\\t.rdata,\\\"dr\\\"\";\n\t/win/\t\t&& return \"\\tAREA\\t|.rdata|,DATA,READONLY,ALIGN=8\";\n\tlast;\n    }\n};\n\nmy $hidden = sub {\n    if ($flavour =~ /ios/)\t{ \".private_extern\\t\".join(',',@_); }\n} if ($flavour !~ /linux|cheri/);\n\nmy $comm = sub {\n    my @args = split(/,\\s*/,shift);\n    my $name = @args[0];\n    my $global = \\$GLOBALS{$name};\n    my $ret;\n\n    if ($flavour =~ /ios32/)\t{\n\t$ret = \".comm\\t_$name,@args[1]\\n\";\n\t$ret .= \".non_lazy_symbol_pointer\\n\";\n\t$ret .= \"$name:\\n\";\n\t$ret .= \".indirect_symbol\\t_$name\\n\";\n\t$ret .= \".long\\t0\\n\";\n\t$ret .= \".previous\";\n\t$name = \"_$name\";\n    } elsif ($flavour =~ /ios64/) {\n\t$name = \"_$name\";\n\t$ret = \".comm\\t$name,@args[1]\";\n    } elsif ($flavour =~ /win/) {\n\t$ret = \"\\tCOMMON\\t|$name|,@args[1]\";\n    } elsif ($flavour =~ /coff/) {\n\t$ret = \".comm\\t$name,@args[1]\";\n    } else {\n\t$ret = \".comm\\t\".join(',',@args);\n    }\n\n    $$global = $name;\n    $ret;\n};\n\nmy $globl = sub {\n    my $name = shift;\n    my $global = \\$GLOBALS{$name};\n    my $ret;\n\n    SWITCH: for ($flavour) {\n\t/ios/\t\t&& do { $name = \"_$name\"; last; };\n\t/win/\t\t&& do { $ret = \"\"; last; };\n    }\n\n    $ret = \".globl\t$name\" if (!defined($ret));\n    $$global = $name;\n    $ret;\n};\nmy $global = $globl;\n\nmy $extern = sub {\n    &$globl(@_);\n    if ($flavour =~ /win/) {\n\treturn \"\\tEXTERN\\t@_\";\n    }\n    return;\t# return nothing\n};\n\nmy $type = sub {\n    my $arg = join(',',@_);\n    my $ret;\n\n    SWITCH: for ($flavour) {\n\t/ios32/\t\t&& do { if ($arg =~ /(\\w+),\\s*%function/) {\n\t\t\t\t    $ret = \"#ifdef __thumb2__\\n\" .\n\t\t\t\t\t   \".thumb_func\t$1\\n\" .\n\t\t\t\t\t   \"#endif\";\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n\t/win/\t\t&& do { if ($arg =~ /(\\w+),\\s*%(function|object)/) {\n\t\t\t\t    my $type = \"[DATA]\";\n\t\t\t\t    if ($2 eq \"function\") {\n\t\t\t\t\t$in_proc = $1;\n\t\t\t\t\t$type = \"[FUNC]\";\n\t\t\t\t    }\n\t\t\t\t    $ret = $GLOBALS{$1} ? \"\\tEXPORT\\t|$1|$type\"\n\t\t\t\t\t\t\t: \"\";\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n\t/coff/\t\t&& do { if ($arg =~ /(\\w+),\\s*%function/) {\n\t\t\t\t    $ret = \".def\t$1;\\n\".\n\t\t\t\t\t   \".type\t32;\\n\".\n\t\t\t\t\t   \".endef\";\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n    }\n    return $ret;\n} if ($flavour !~ /linux|cheri/);\n\nmy $size = sub {\n    if ($in_proc && $flavour =~ /win/) {\n\t$in_proc = undef;\n\treturn \"\\tENDP\";\n    }\n} if ($flavour !~ /linux|cheri/);\n\nmy $inst = sub {\n    if ($flavour =~ /win/)\t{ \"\\tDCDU\\t\".join(',',@_); }\n    else\t\t\t{ \".long\\t\".join(',',@_);  }\n} if ($flavour !~ /linux|cheri/);\n\nmy $asciz = sub {\n    my $line = join(\",\",@_);\n    if ($line =~ /^\"(.*)\"$/)\n    {\tif ($flavour =~ /win/) {\n\t    \"\\tDCB\\t$line,0\\n\\tALIGN\\t4\";\n\t} else {\n\t    \".byte\t\" . join(\",\",unpack(\"C*\",$1),0) . \"\\n.align\t2\";\n\t}\n    } else {\t\"\";\t}\n};\n\nmy $align = sub {\n    \"\\tALIGN\\t\".2**@_[0];\n} if ($flavour =~ /win/);\n   $align = sub {\n    \".p2align\\t\".@_[0];\n} if ($flavour =~ /coff/);\n\nmy $byte = sub {\n    \"\\tDCB\\t\".join(',',@_);\n} if ($flavour =~ /win/);\n\nmy $short = sub {\n    \"\\tDCWU\\t\".join(',',@_);\n} if ($flavour =~ /win/);\n\nmy $word = sub {\n    \"\\tDCDU\\t\".join(',',@_);\n} if ($flavour =~ /win/);\n\nmy $long = $word if ($flavour =~ /win/);\n\nmy $quad = sub {\n    \"\\tDCQU\\t\".join(',',@_);\n} if ($flavour =~ /win/);\n\nmy $skip = sub {\n    \"\\tSPACE\\t\".shift;\n} if ($flavour =~ /win/);\n\nmy $code = sub {\n    \"\\tCODE@_[0]\";\n} if ($flavour =~ /win/);\n\nmy $thumb = sub {\t# .thumb should appear prior .text in source\n    \"# define ARM THUMB\\n\" .\n    \"\\tTHUMB\";\n} if ($flavour =~ /win/);\n\nmy $text = sub {\n    \"\\tAREA\\t|.text|,CODE,ALIGN=8,\".($flavour =~ /64/ ? \"ARM64\" : \"ARM\");\n} if ($flavour =~ /win/);\n\nmy $syntax = sub {} if ($flavour =~ /win/);\t# omit .syntax\n\nmy $rva = sub {\n    # .rva directive comes in handy only on 32-bit Windows, i.e. it can\n    # be used only in '#if defined(_WIN32) && !defined(_WIN64)' sections.\n    # However! Corresponding compilers don't seem to bet on PIC, which\n    # raises the question why would assembler programmer have to jump\n    # through the hoops? But just in case, it would go as following:\n    #\n    #\tldr\tr1,.LOPENSSL_armcap\n    #\tldr\tr2,.LOPENSSL_armcap+4\n    #\tadr\tr0,.LOPENSSL_armcap\n    #\tbic\tr1,r1,#1\t\t; de-thumb-ify link.exe's ideas\n    #\tsub\tr0,r0,r1\t\t; r0 is image base now\n    #\tldr\tr0,[r0,r2]\n    #\t...\n    #.LOPENSSL_armcap:\n    #\t.rva\t.LOPENSSL_armcap\t; self-reference\n    #\t.rva\tOPENSSL_armcap_P\t; real target\n    #\n    # Non-position-independent [and ISA-neutral] alternative is so much\n    # simpler:\n    #\n    #\tldr\tr0,.LOPENSSL_armcap\n    #\tldr\tr0,[r0]\n    #\t...\n    #.LOPENSSL_armcap:\n    #\t.long\tOPENSSL_armcap_P\n    #\n    \"\\tDCDU\\t@_[0]\\n\\tRELOC\\t2\"\n} if ($flavour =~ /win(?!64)/);\n\n################################################################\n# some broken instructions in Visual Studio armasm[64]...\n\nmy $it = sub {} if ($flavour =~ /win32/);\t# omit 'it'\n\nmy $ext = sub {\n    \"\\text8\\t\".join(',',@_);\n} if ($flavour =~ /win64/);\n\nmy $csel = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    my @regs = split(m|,\\s*|,$args);\n    my $cond = pop(@regs);\n\n    \"\\tcsel$cond\\t\".join(',',@regs);\n} if ($flavour =~ /win64/);\n\nmy $csetm = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    my @regs = split(m|,\\s*|,$args);\n    my $cond = pop(@regs);\n\n    \"\\tcsetm$cond\\t\".join(',',@regs);\n} if ($flavour =~ /win64/);\n\n# ... then conditional branch instructions are also broken, but\n# maintaining all the variants is tedious, so I kludge-fix it\n# elsewhere...\n\n################################################################\n# CHERI-specific synthetic instructions\nmy $alignd = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    $args =~ s/\\b(?:x([0-9]+)|(sp))\\b/c$1$2/g;\n    my @regs = split(m|,\\s*|,$args);\n\n    \"\\talignd\\t\".join(',',@regs);\n};\n\nmy $scvalue = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    $args =~ s/\\b(?:x([0-9]+)|(sp))\\b/c$1$2/g;\n    my @regs = split(m|,\\s*|,$args);\n    @regs[2] =~ s/\\bc([0-9])\\b/x$1/;\n\n    \"\\tscvalue\\t\".join(',',@regs);\n};\n\nmy $scbnds = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    $args =~ s/\\b(?:x([0-9]+)|(sp))\\b/c$1$2/g;\n    my @regs = split(m|,\\s*|,$args);\n    @regs[2] =~ s/\\bc([0-9])\\b/x$1/;\n\n    \"\\tscbnds\\t\".join(',',@regs);\n};\n\nmy $cadd = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    if ($flavour =~ /cheri/) {\n\t$args =~ s/\\b(?:x([0-9]+)|(sp))\\b/c$1$2/g;\n    } else {\n\t$args =~ s/\\bc([0-9]+)\\b/x$1/g;\n    }\n    my @regs = split(m|,\\s*|,$args);\n    @regs[2] =~ s/c([0-9])/x$1/;\n\n    \"\\tadd\\t\".join(',',@regs);\n};\n\nmy $csub = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    if ($flavour =~ /cheri/) {\n\t$args =~ s/\\b(?:x([0-9]+)|(sp))\\b/c$1$2/g;\n    } else {\n\t$args =~ s/\\bc([0-9]+)\\b/x$1/g;\n    }\n    my @regs = split(m|,\\s*|,$args);\n    @regs[2] =~ s/c([0-9])/x$1/;\n\n    \"\\tsub\\t\".join(',',@regs);\n};\n\nmy $cmov = sub {\n    my $args = shift;\n    if ($flavour =~ /cheri/) {\n\t$args =~ s/\\b(?:x([0-9]+)|(sp))\\b/c$1$2/g;\n    } else {\n\t$args =~ s/\\bc([0-9]+)\\b/x$1/g;\n    }\n\n    \"\\tmov\\t\".$args;\n};\n\nmy $adr = sub {\n    my $args = shift;\n    $args =~ s/\\bx([0-9]+)\\b/c$1/g;\n\n    \"\\tadr\\t\".$args;\n} if ($flavour =~ /cheri/);\n\n################################################################\nmy $adrp = sub {\n    my ($args,$comment) = split(m|\\s*//|,shift);\n    \"\\tadrp\\t$args\\@PAGE\";\n} if ($flavour =~ /ios64/);\n\nmy $paciasp = sub {\n    ($flavour =~ /linux|cheri/) ? \"\\thint\\t#PACI_HINT\"\n                                : \"\\thint\\t#25\";\n};\n\nmy $autiasp = sub {\n    ($flavour =~ /linux|cheri/) ? \"\\thint\\t#AUTI_HINT\"\n                                : \"\\thint\\t#29\";\n};\n\nsub range {\n  my ($r,$sfx,$start,$end) = @_;\n\n    join(\",\",map(\"$r$_$sfx\",($start..$end)));\n}\n\nsub expand_line {\n  my $line = shift;\n  my @ret = ();\n\n    pos($line)=0;\n\n    while ($line =~ m/\\G[^@\\/\\{\\\"]*/g) {\n\tif ($line =~ m/\\G(@|\\/\\/|$)/gc) {\n\t    last;\n\t}\n\telsif ($line =~ m/\\G\\{/gc) {\n\t    my $saved_pos = pos($line);\n\t    $line =~ s/\\G([rdqv])([0-9]+)([^\\-]*)\\-\\1([0-9]+)\\3/range($1,$3,$2,$4)/e;\n\t    pos($line) = $saved_pos;\n\t    $line =~ m/\\G[^\\}]*\\}/g;\n\t}\n\telsif ($line =~ m/\\G\\\"/gc) {\n\t    $line =~ m/\\G[^\\\"]*\\\"/g;\n\t}\n    }\n\n    $line =~ s/\\b(\\w+)/$GLOBALS{$1} or $1/ge;\n\n    if ($flavour =~ /cheri/) {\n\t$line =~ s/\\[\\s*(?:x([0-9]+)|(sp))\\s*(,?.*)\\]/[c$1$2$3]/;\n    } else {\n\t$line =~ s/\\bc([0-9]+)\\b/x$1/g;\n\t$line =~ s/\\bcsp\\b/sp/g;\n    }\n\n    if ($flavour =~ /win/) {\n\t# adjust alignment hints, \"[rN,:32]\" -> \"[rN@32]\"\n\t$line =~ s/(\\[\\s*(?:r[0-9]+|sp))\\s*,?\\s*:([0-9]+\\s*\\])/$1\\@$2/;\n\t# adjust local labels, \".Lwhatever\" -> \"|$Lwhatever|\"\n\t$line =~ s/\\.(L\\w{2,})/|\\$$1|/g;\n\t# omit \"#:lo12:\" on win64\n\t$line =~ s/#:lo12://;\n    } elsif ($flavour =~ /coff(?!64)/) {\n\t$line =~ s/\\.L(\\w{2,})/(\\$ML$1)/g;\n    } elsif ($flavour =~ /ios64/) {\n\t$line =~ s/#:lo12:(\\w+)/$1\\@PAGEOFF/;\n    }\n\n    if ($flavour =~ /64/) {\n\t# \"vX.Md[N]\" -> \"vX.d[N]\n\t$line =~ s/\\b(v[0-9]+)\\.[1-9]+([bhsd]\\[[0-9]+\\])/$1.$2/;\n    }\n\n    return $line;\n}\n\nif ($flavour =~ /win(32|64)/) {\n    print<<___;\n GBLA __SIZEOF_POINTER__\n__SIZEOF_POINTER__ SETA $1/8\n___\n} elsif ($flavour =~ /linux|cheri/) {\n    print<<___;\n#if defined(__ARM_FEATURE_PAC_DEFAULT) && __ARM_FEATURE_PAC_DEFAULT==2\n# define PACI_HINT 27\n# define AUTI_HINT 31\n#else\n# define PACI_HINT 25\n# define AUTI_HINT 29\n#endif\n\n___\n}\n\nwhile(my $line=<>) {\n\n    if ($flavour =~ /win/) {\n\tif ($line =~ m/^#\\s*(ifdef|ifndef|else|endif)\\b(.*)/) {\n\t    my ($op, $arg) = ($1, $2);\n\t    $op = \"if :def:\"\t\tif ($op eq \"ifdef\");\n\t    $op = \"if :lnot::def:\"\tif ($op eq \"ifndef\");\n\t    print \" \".$op.$arg.\"\\n\";\n\t    next;\n\t}\n\t$line =~ s|//.*||;\n    }\n\n    # fix up assembler-specific commentary delimiter\n    $line =~ s/@(?=[\\s@])/\\;/g if ($flavour =~ /win|coff/);\n\n    if ($line =~ m/^\\s*(#|@|;|\\/\\/)/)\t{ print $line; next; }\n\n    $line =~ s|/\\*.*\\*/||;\t# get rid of C-style comments...\n    $line =~ s|^\\s+||;\t\t# ... and skip white spaces in beginning...\n    $line =~ s|\\s+$||;\t\t# ... and at the end\n\n    {\n\t$line =~ s|[\\b\\.]L(\\w{2,})|L$1|g;\t# common denominator for Locallabel\n\t$line =~ s|\\bL(\\w{2,})|\\.L$1|g\tif ($dotinlocallabels);\n    }\n\n    {\n\t$line =~ s|(^[\\.\\w]+)\\:\\s*||;\n\tmy $label = $1;\n\tif ($label) {\n\t    $label = ($GLOBALS{$label} or $label);\n\t    if ($flavour =~ /win/) {\n\t\t$label =~ s|^\\.L(?=\\w)|\\$L|;\n\t\tprintf \"|%s|%s\", $label, ($label eq $in_proc ? \" PROC\" : \"\");\n\t    } else {\n\t\t$label =~ s|^\\.L(?=\\w)|\\$ML| if ($flavour =~ /coff(?!64)/);\n\t\tprintf \"%s:\", $label;\n\t    }\n\t}\n    }\n\n    if ($line !~ m/^[#@;]/) {\n\t$line =~ s|^\\s*(\\.?)(\\S+)\\s*||;\n\tmy $c = $1; $c = \"\\t\" if ($c eq \"\");\n\tmy $mnemonic = $2;\n\tmy $opcode;\n\tif ($mnemonic =~ m/([^\\.]+)\\.([^\\.]+)/) {\n\t    $opcode = eval(\"\\$$1_$2\");\n\t} else {\n\t    $opcode = eval(\"\\$$mnemonic\");\n\t}\n\n\tmy $arg=expand_line($line);\n\n\tif (ref($opcode) eq 'CODE') {\n\t    $line = &$opcode($arg);\n\t} elsif ($mnemonic)         {\n\t    if ($flavour =~ /win64/) {\n\t\t# \"b.cond\" -> \"bcond\", kludge-fix:-(\n\t\t$mnemonic =~ s/^b\\.([a-z]{2}$)/b$1/;\n\t    }\n\t    $line = $c.$mnemonic;\n\t    $line.= \"\\t$arg\" if ($arg ne \"\");\n\t}\n    }\n\n    print $line if ($line);\n    print \"\\n\";\n}\n\nif ($flavour =~ /win/) {\n    print \"\\tEND\\n\";\n} elsif ($flavour =~ /linux|cheri/) {\n    # -mbranch-protection=standanrd segment, snatched from compiler -S output\n    print <<___;\n\n#if defined(__ARM_FEATURE_BTI_DEFAULT) || defined(__ARM_FEATURE_PAC_DEFAULT)\n.section\t.note.GNU-stack,\"\",\\@progbits\n.section\t.note.gnu.property,\"a\",\\@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000000,4,3\n.align  3\n2:\n#endif\n___\n}\n\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ct_inverse_mod_256-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast Euclidean inversion as suggested in\n# https://eprint.iacr.org/2020/972. ~4.600 cycles on Apple M1, ~8.900 -\n# on Cortex-A57.\n#\n# void ct_inverse_mod_256(vec512 ret, const vec256 inp, const vec256 mod,\n#                                                       const vec256 modx);\n#\n$python_ref.=<<'___';\ndef ct_inverse_mod_256(inp, mod):\n    a, u = inp, 1\n    b, v = mod, 0\n\n    k = 31\n    mask = (1 << k) - 1\n\n    for i in range(0, 512 // k - 1):\n        # __ab_approximation_31\n        n = max(a.bit_length(), b.bit_length())\n        if n < 64:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-k-2)) << k)\n            b_ = (b & mask) | ((b >> (n-k-2)) << k)\n\n        # __inner_loop_31\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n\n        # __smul_256_n_shift_by_31\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if a < 0:\n            a, f0, g0 = -a, -f0, -g0\n        if b < 0:\n            b, f1, g1 = -b, -f1, -g1\n\n        # __smul_512x63\n        u, v = u*f0 + v*g0, u*f1 + v*g1\n\n    if 512 % k + k:\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, 512 % k + k):\n            if a & 1:\n                if a < b:\n                    a, b, f0, g0, f1, g1 = b, a, f1, g1, f0, g0\n                a, f0, g0 = a-b, f0-f1, g0-g1\n            a, f1, g1 = a >> 1, f1 << 1, g1 << 1\n\n        v = u*f1 + v*g1\n\n    mod <<= 512 - mod.bit_length()  # align to the left\n    if v < 0:\n        v += mod\n    if v < 0:\n        v += mod\n    elif v == 1<<512:\n        v -= mod\n\n    return v & (2**512 - 1) # to be reduced % mod\n___\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\nmy ($out_ptr, $in_ptr, $n_ptr, $nx_ptr) = map(\"x$_\", (0..3));\nmy @acc=map(\"x$_\",(4..11));\nmy ($f0, $g0, $f1, $g1, $f_, $g_) = map(\"x$_\",(12..17));\nmy $cnt = $n_ptr;\nmy @t = map(\"x$_\",(19..26));\nmy ($a_lo, $b_lo) = @acc[3,7];\n\n$frame = 16+2*512;\n\n$code.=<<___;\n.text\n\n.globl\tct_inverse_mod_256\n.hidden\tct_inverse_mod_256\n.type\tct_inverse_mod_256, %function\n.align\t5\nct_inverse_mod_256:\n\tpaciasp\n\tstp\tc29, c30, [csp,#-10*__SIZEOF_POINTER__]!\n\tadd\tc29, csp, #0\n\tstp\tc19, c20, [csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21, c22, [csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23, c24, [csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25, c26, [csp,#8*__SIZEOF_POINTER__]\n\tsub\tcsp, csp, #$frame\n\n\tldp\t@acc[0], @acc[1], [$in_ptr,#8*0]\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tcadd\t$in_ptr, csp, #16+511\n\talignd\t$in_ptr, $in_ptr, #9\n\tscbnds\t$in_ptr, $in_ptr, #512\n#else\n\tadd\t$in_ptr, sp, #16+511\t// find closest 512-byte-aligned spot\n\tand\t$in_ptr, $in_ptr, #-512\t// in the frame...\n#endif\n\tstr\tc0, [csp]\t\t// offload out_ptr\n\n\tldp\t@acc[4], @acc[5], [$n_ptr,#8*0]\n\tldp\t@acc[6], @acc[7], [$n_ptr,#8*2]\n\n\tstp\t@acc[0], @acc[1], [$in_ptr,#8*0]\t// copy input to |a|\n\tstp\t@acc[2], @acc[3], [$in_ptr,#8*2]\n\tstp\t@acc[4], @acc[5], [$in_ptr,#8*4]\t// copy modulus to |b|\n\tstp\t@acc[6], @acc[7], [$in_ptr,#8*6]\n\n\t////////////////////////////////////////// first iteration\n\tbl\t.Lab_approximation_31_256_loaded\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tstr\t$f0,[$out_ptr,#8*8]\t\t// initialize |u| with |f0|\n\n\tmov\t$f0, $f1\t\t\t// |f1|\n\tmov\t$g0, $g1\t\t\t// |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*4\t// pointer to dst |b|\n\tbl\t__smul_256_n_shift_by_31\n\tstr\t$f0, [$out_ptr,#8*10]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\t$f_, $f0\t\t\t// corrected |f0|\n\tmov\t$g_, $g0\t\t\t// corrected |g0|\n\n\tmov\t$f0, $f1\t\t\t// |f1|\n\tmov\t$g0, $g1\t\t\t// |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*4\t// pointer to destination |b|\n\tbl\t__smul_256_n_shift_by_31\n\n\tldr\t@acc[4], [$in_ptr,#8*8]\t\t// |u|\n\tldr\t@acc[5], [$in_ptr,#8*14]\t// |v|\n\tmadd\t@acc[0], $f_, @acc[4], xzr\t// |u|*|f0|\n\tmadd\t@acc[0], $g_, @acc[5], @acc[0]\t// |v|*|g0|\n\tasr\t@acc[1], @acc[0], #63\t\t// sign extension\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*4]\n\tstp\t@acc[1], @acc[1], [$out_ptr,#8*6]\n\n\tmadd\t@acc[0], $f0, @acc[4], xzr\t// |u|*|f1|\n\tmadd\t@acc[0], $g0, @acc[5], @acc[0]\t// |v|*|g1|\n\tasr\t@acc[1], @acc[0], #63\t\t// sign extension\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*10]\n\tstp\t@acc[1], @acc[1], [$out_ptr,#8*12]\n___\nfor($i=2; $i<15; $i++) {\n$code.=<<___;\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tbl\t__ab_approximation_31_256\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tbl\t__smul_256_n_shift_by_31\n\tmov\t$f_, $f0\t\t\t// corrected |f0|\n\tmov\t$g_, $g0\t\t\t// corrected |g0|\n\n\tmov\t$f0, $f1\t\t\t// |f1|\n\tmov\t$g0, $g1\t\t\t// |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*4\t// pointer to destination |b|\n\tbl\t__smul_256_n_shift_by_31\n\n\tcadd\t$out_ptr, $out_ptr, #8*4\t// pointer to destination |u|\n\tbl\t__smul_256x63\n___\n$code.=<<___\tif ($i==7);\n\tasr\t@t[5], @t[5], #63\n\tstr\t@t[5], [$out_ptr,#8*4]\n___\n$code.=<<___\tif ($i>7);\n\tadc\t@t[3], @t[3], @t[4]\n\tstr\t@t[3], [$out_ptr,#8*4]\n___\n$code.=<<___;\n\tmov\t$f_, $f0\t\t\t// corrected |f1|\n\tmov\t$g_, $g0\t\t\t// corrected |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*6\t// pointer to destination |v|\n\tbl\t__smul_256x63\n___\n$code.=<<___\tif ($i>7);\n\tbl\t__smul_512x63_tail\n___\n$code.=<<___\tif ($i==7);\n\tasr\t@t[5], @t[5], #63\t\t// sign extension\n\tstp\t@t[5], @t[5], [$out_ptr,#8*4]\n\tstp\t@t[5], @t[5], [$out_ptr,#8*6]\n___\n}\n$code.=<<___;\n\t////////////////////////////////////////// two[!] last iterations\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tmov\t$cnt, #47\t\t\t// 31 + 512 % 31\n\t//bl\t__ab_approximation_62_256\t// |a| and |b| are exact,\n\tldr\t$a_lo, [$in_ptr,#8*0]\t\t// just load\n\tldr\t$b_lo, [$in_ptr,#8*4]\n\tbl\t__inner_loop_62_256\n\n\tmov\t$f_, $f1\n\tmov\t$g_, $g1\n\tldr\tc0, [csp]\t\t\t// original out_ptr\n\tbl\t__smul_256x63\n\tbl\t__smul_512x63_tail\n\tldr\tc30, [c29,#__SIZEOF_POINTER__]\n\n\tsmulh\t@t[1], @acc[3], $g_\t\t// figure out top-most limb\n\tldp\t@acc[4], @acc[5], [$nx_ptr,#8*0]\n\tadc\t@t[4], @t[4], @t[6]\n\tldp\t@acc[6], @acc[7], [$nx_ptr,#8*2]\n\n\tadd\t@t[1], @t[1], @t[4]\t\t// @t[1] is 1, 0 or -1\n\tasr\t@t[0], @t[1], #63\t\t// sign as mask\n\n\tand\t@t[4],   @acc[4], @t[0]\t\t// add mod<<256 conditionally\n\tand\t@t[5],   @acc[5], @t[0]\n\tadds\t@acc[0], @acc[0], @t[4]\n\tand\t@t[6],   @acc[6], @t[0]\n\tadcs\t@acc[1], @acc[1], @t[5]\n\tand\t@t[7],   @acc[7], @t[0]\n\tadcs\t@acc[2], @acc[2], @t[6]\n\tadcs\t@acc[3], @t[3],   @t[7]\n\tadc\t@t[1], @t[1], xzr\t\t// @t[1] is 1, 0 or -1\n\n\tneg\t@t[0], @t[1]\n\torr\t@t[1], @t[1], @t[0]\t\t// excess bit or sign as mask\n\tasr\t@t[0], @t[0], #63\t\t// excess bit as mask\n\n\tand\t@acc[4], @acc[4], @t[1]\t\t// mask |mod|\n\tand\t@acc[5], @acc[5], @t[1]\n\tand\t@acc[6], @acc[6], @t[1]\n\tand\t@acc[7], @acc[7], @t[1]\n\n\teor\t@acc[4], @acc[4], @t[0]\t\t// conditionally negate |mod|\n\teor\t@acc[5], @acc[5], @t[0]\n\tadds\t@acc[4], @acc[4], @t[0], lsr#63\n\teor\t@acc[6], @acc[6], @t[0]\n\tadcs\t@acc[5], @acc[5], xzr\n\teor\t@acc[7], @acc[7], @t[0]\n\tadcs\t@acc[6], @acc[6], xzr\n\tadc\t@acc[7], @acc[7], xzr\n\n\tadds\t@acc[0], @acc[0], @acc[4]\t// final adjustment for |mod|<<256\n\tadcs\t@acc[1], @acc[1], @acc[5]\n\tadcs\t@acc[2], @acc[2], @acc[6]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*4]\n\tadc\t@acc[3], @acc[3], @acc[7]\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*6]\n\n\tadd\tcsp, csp, #$frame\n\tldp\tc19, c20, [c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21, c22, [c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23, c24, [c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25, c26, [c29,#8*__SIZEOF_POINTER__]\n\tldr\tc29, [csp],#10*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tct_inverse_mod_256,.-ct_inverse_mod_256\n\n////////////////////////////////////////////////////////////////////////\n.type\t__smul_256x63, %function\n.align\t5\n__smul_256x63:\n___\nfor($j=0; $j<2; $j++) {\nmy $f_ = $f_;   $f_ = $g_          if ($j);\nmy @acc = @acc; @acc = @acc[4..7]  if ($j);\nmy $k = 8*8+8*6*$j;\n$code.=<<___;\n\tldp\t@acc[0], @acc[1], [$in_ptr,#8*0+$k]\t// load |u| (or |v|)\n\tasr\t$f1, $f_, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2+$k]\n\teor\t$f_, $f_, $f1\t\t// conditionally negate |f_| (or |g_|)\n\tldr\t@t[3+$j], [$in_ptr,#8*4+$k]\n\n\teor\t@acc[0], @acc[0], $f1\t// conditionally negate |u| (or |v|)\n\tsub\t$f_, $f_, $f1\n\teor\t@acc[1], @acc[1], $f1\n\tadds\t@acc[0], @acc[0], $f1, lsr#63\n\teor\t@acc[2], @acc[2], $f1\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], $f1\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@t[3+$j], @t[3+$j], $f1\n\t umulh\t@t[0], @acc[0], $f_\n\tadcs\t@acc[3], @acc[3], xzr\n\t umulh\t@t[1], @acc[1], $f_\n\tadcs\t@t[3+$j], @t[3+$j], xzr\n\t umulh\t@t[2], @acc[2], $f_\n___\n$code.=<<___\tif ($j!=0);\n\tadc\t$g1, xzr, xzr\t\t// used in __smul_512x63_tail\n___\n$code.=<<___;\n\tmul\t@acc[0], @acc[0], $f_\n\t cmp\t$f_, #0\n\tmul\t@acc[1], @acc[1], $f_\n\t csel\t@t[3+$j], @t[3+$j], xzr, ne\n\tmul\t@acc[2], @acc[2], $f_\n\tadds\t@acc[1], @acc[1], @t[0]\n\tmul\t@t[5+$j], @acc[3], $f_\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tadcs\t@t[5+$j], @t[5+$j], @t[2]\n___\n$code.=<<___\tif ($j==0);\n\tadc\t@t[7], xzr, xzr\n___\n}\n$code.=<<___;\n\tadc\t@t[7], @t[7], xzr\n\n\tadds\t@acc[0], @acc[0], @acc[4]\n\tadcs\t@acc[1], @acc[1], @acc[5]\n\tadcs\t@acc[2], @acc[2], @acc[6]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*0]\n\tadcs\t@t[5],   @t[5],   @t[6]\n\tstp\t@acc[2], @t[5], [$out_ptr,#8*2]\n\n\tret\n.size\t__smul_256x63,.-__smul_256x63\n\n.type\t__smul_512x63_tail, %function\n.align\t5\n__smul_512x63_tail:\n\tumulh\t@t[5], @acc[3], $f_\n\tldr\t@acc[1], [$in_ptr,#8*19]\t// load rest of |v|\n\tadc\t@t[7], @t[7], xzr\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*20]\n\tand\t@t[3], @t[3], $f_\n\n\tumulh\t@acc[7], @acc[7], $g_\t// resume |v|*|g1| chain\n\n\tsub\t@t[5], @t[5], @t[3]\t// tie up |u|*|f1| chain\n\tasr\t@t[6], @t[5], #63\n\n\teor\t@acc[1], @acc[1], $f1\t// conditionally negate rest of |v|\n\teor\t@acc[2], @acc[2], $f1\n\tadds\t@acc[1], @acc[1], $g1\n\teor\t@acc[3], @acc[3], $f1\n\tadcs\t@acc[2], @acc[2], xzr\n\t umulh\t@t[0], @t[4],   $g_\n\tadc\t@acc[3], @acc[3], xzr\n\t umulh\t@t[1], @acc[1], $g_\n\tadd\t@acc[7], @acc[7], @t[7]\n\t umulh\t@t[2], @acc[2], $g_\n\n\tmul\t@acc[0], @t[4],   $g_\n\tmul\t@acc[1], @acc[1], $g_\n\tadds\t@acc[0], @acc[0], @acc[7]\n\tmul\t@acc[2], @acc[2], $g_\n\tadcs\t@acc[1], @acc[1], @t[0]\n\tmul\t@t[3],   @acc[3], $g_\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tadcs\t@t[3],   @t[3],   @t[2]\n\tadc\t@t[4], xzr, xzr\t\t// used in the final step\n\n\tadds\t@acc[0], @acc[0], @t[5]\n\tadcs\t@acc[1], @acc[1], @t[6]\n\tadcs\t@acc[2], @acc[2], @t[6]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*4]\n\tadcs\t@t[3],   @t[3],   @t[6]\t// carry is used in the final step\n\tstp\t@acc[2], @t[3],   [$out_ptr,#8*6]\n\n\tret\n.size\t__smul_512x63_tail,.-__smul_512x63_tail\n\n.type\t__smul_256_n_shift_by_31, %function\n.align\t5\n__smul_256_n_shift_by_31:\n___\nfor($j=0; $j<2; $j++) {\nmy $f0 = $f0;   $f0 = $g0           if ($j);\nmy @acc = @acc; @acc = @acc[4..7]   if ($j);\nmy $k = 8*4*$j;\n$code.=<<___;\n\tldp\t@acc[0], @acc[1], [$in_ptr,#8*0+$k]\t// load |a| (or |b|)\n\tasr\t@t[5], $f0, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2+$k]\n\teor\t@t[6], $f0, @t[5]\t// conditionally negate |f0| (or |g0|)\n\n\teor\t@acc[0], @acc[0], @t[5]\t// conditionally negate |a| (or |b|)\n\tsub\t@t[6], @t[6], @t[5]\n\teor\t@acc[1], @acc[1], @t[5]\n\tadds\t@acc[0], @acc[0], @t[5], lsr#63\n\teor\t@acc[2], @acc[2], @t[5]\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], @t[5]\n\t umulh\t@t[0], @acc[0], @t[6]\n\tadcs\t@acc[2], @acc[2], xzr\n\t umulh\t@t[1], @acc[1], @t[6]\n\tadc\t@acc[3], @acc[3], xzr\n\t umulh\t@t[2], @acc[2], @t[6]\n\tand\t@t[5], @t[5], @t[6]\n\t umulh\t@t[3+$j], @acc[3], @t[6]\n\tneg\t@t[5], @t[5]\n\n\tmul\t@acc[0], @acc[0], @t[6]\n\tmul\t@acc[1], @acc[1], @t[6]\n\tmul\t@acc[2], @acc[2], @t[6]\n\tadds\t@acc[1], @acc[1], @t[0]\n\tmul\t@acc[3], @acc[3], @t[6]\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tadcs\t@acc[3], @acc[3], @t[2]\n\tadc\t@t[3+$j], @t[3+$j], @t[5]\n___\n}\n$code.=<<___;\n\tadds\t@acc[0], @acc[0], @acc[4]\n\tadcs\t@acc[1], @acc[1], @acc[5]\n\tadcs\t@acc[2], @acc[2], @acc[6]\n\tadcs\t@acc[3], @acc[3], @acc[7]\n\tadc\t@acc[4], @t[3],   @t[4]\n\n\textr\t@acc[0], @acc[1], @acc[0], #31\n\textr\t@acc[1], @acc[2], @acc[1], #31\n\textr\t@acc[2], @acc[3], @acc[2], #31\n\tasr\t@t[4], @acc[4], #63\t// result's sign as mask\n\textr\t@acc[3], @acc[4], @acc[3], #31\n\n\teor\t@acc[0], @acc[0], @t[4]\t// ensure the result is positive\n\teor\t@acc[1], @acc[1], @t[4]\n\tadds\t@acc[0], @acc[0], @t[4], lsr#63\n\teor\t@acc[2], @acc[2], @t[4]\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], @t[4]\n\tadcs\t@acc[2], @acc[2], xzr\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*0]\n\tadc\t@acc[3], @acc[3], xzr\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*2]\n\n\teor\t$f0, $f0, @t[4]\t\t// adjust |f/g| accordingly\n\teor\t$g0, $g0, @t[4]\n\tsub\t$f0, $f0, @t[4]\n\tsub\t$g0, $g0, @t[4]\n\n\tret\n.size\t__smul_256_n_shift_by_31,.-__smul_256_n_shift_by_31\n___\n\n{\nmy @a = @acc[0..3];\nmy @b = @acc[4..7];\nmy ($fg0, $fg1, $bias) = ($g0, $g1, @t[4]);\n\n$code.=<<___;\n.type\t__ab_approximation_31_256, %function\n.align\t4\n__ab_approximation_31_256:\n\tldp\t@a[2], @a[3], [$in_ptr,#8*2]\n\tldp\t@b[2], @b[3], [$in_ptr,#8*6]\n\tldp\t@a[0], @a[1], [$in_ptr,#8*0]\n\tldp\t@b[0], @b[1], [$in_ptr,#8*4]\n\n.Lab_approximation_31_256_loaded:\n\torr\t@t[0], @a[3], @b[3]\t// check top-most limbs, ...\n\tcmp\t@t[0], #0\n\tcsel\t@a[3], @a[3], @a[2], ne\n\tcsel\t@b[3], @b[3], @b[2], ne\n\tcsel\t@a[2], @a[2], @a[1], ne\n\torr\t@t[0], @a[3], @b[3]\t// and ones before top-most, ...\n\tcsel\t@b[2], @b[2], @b[1], ne\n\n\tcmp\t@t[0], #0\n\tcsel\t@a[3], @a[3], @a[2], ne\n\tcsel\t@b[3], @b[3], @b[2], ne\n\tcsel\t@a[2], @a[2], @a[0], ne\n\torr\t@t[0], @a[3], @b[3]\t// and one more, ...\n\tcsel\t@b[2], @b[2], @b[0], ne\n\n\tclz\t@t[0], @t[0]\n\tcmp\t@t[0], #64\n\tcsel\t@t[0], @t[0], xzr, ne\n\tcsel\t@a[3], @a[3], @a[2], ne\n\tcsel\t@b[3], @b[3], @b[2], ne\n\tneg\t@t[1], @t[0]\n\n\tlslv\t@a[3], @a[3], @t[0]\t// align high limbs to the left\n\tlslv\t@b[3], @b[3], @t[0]\n\tlsrv\t@a[2], @a[2], @t[1]\n\tlsrv\t@b[2], @b[2], @t[1]\n\tand\t@a[2], @a[2], @t[1], asr#6\n\tand\t@b[2], @b[2], @t[1], asr#6\n\torr\t$a_lo, @a[3], @a[2]\n\torr\t$b_lo, @b[3], @b[2]\n\n\tbfxil\t$a_lo, @a[0], #0, #31\n\tbfxil\t$b_lo, @b[0], #0, #31\n\n\tb\t__inner_loop_31_256\n\tret\n.size\t__ab_approximation_31_256,.-__ab_approximation_31_256\n\n.type\t__inner_loop_31_256, %function\n.align\t4\n__inner_loop_31_256:\n\tmov\t$cnt, #31\n\tmov\t$fg0, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\t$fg1, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\t$bias,#0x7FFFFFFF7FFFFFFF\n\n.Loop_31_256:\n\tsbfx\t@t[3], $a_lo, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\t$cnt, $cnt, #1\n\tand\t@t[0], $b_lo, @t[3]\n\tsub\t@t[1], $b_lo, $a_lo\t// |b_|-|a_|\n\tsubs\t@t[2], $a_lo, @t[0]\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\t@t[0], $fg1\n\tcsel\t$b_lo, $b_lo, $a_lo, hs\t// |b_| = |a_|\n\tcsel\t$a_lo, @t[2], @t[1], hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\t$fg1, $fg1, $fg0,    hs\t// exchange |fg0| and |fg1|\n\tcsel\t$fg0, $fg0, @t[0],   hs\n\tlsr\t$a_lo, $a_lo, #1\n\tand\t@t[0], $fg1, @t[3]\n\tand\t@t[1], $bias, @t[3]\n\tsub\t$fg0, $fg0, @t[0]\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\t$fg1, $fg1, $fg1\t// |f1|<<=1\n\tadd\t$fg0, $fg0, @t[1]\n\tsub\t$fg1, $fg1, $bias\n\tcbnz\t$cnt, .Loop_31_256\n\n\tmov\t$bias, #0x7FFFFFFF\n\tubfx\t$f0, $fg0, #0, #32\n\tubfx\t$g0, $fg0, #32, #32\n\tubfx\t$f1, $fg1, #0, #32\n\tubfx\t$g1, $fg1, #32, #32\n\tsub\t$f0, $f0, $bias\t\t// remove bias\n\tsub\t$g0, $g0, $bias\n\tsub\t$f1, $f1, $bias\n\tsub\t$g1, $g1, $bias\n\n\tret\n.size\t__inner_loop_31_256,.-__inner_loop_31_256\n\n.type\t__inner_loop_62_256, %function\n.align\t4\n__inner_loop_62_256:\n\tmov\t$f0, #1\t\t// |f0|=1\n\tmov\t$g0, #0\t\t// |g0|=0\n\tmov\t$f1, #0\t\t// |f1|=0\n\tmov\t$g1, #1\t\t// |g1|=1\n\n.Loop_62_256:\n\tsbfx\t@t[3], $a_lo, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\t$cnt, $cnt, #1\n\tand\t@t[0], $b_lo, @t[3]\n\tsub\t@t[1], $b_lo, $a_lo\t// |b_|-|a_|\n\tsubs\t@t[2], $a_lo, @t[0]\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\t@t[0], $f0\n\tcsel\t$b_lo, $b_lo, $a_lo, hs\t// |b_| = |a_|\n\tcsel\t$a_lo, @t[2], @t[1], hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tmov\t@t[1], $g0\n\tcsel\t$f0, $f0, $f1,       hs\t// exchange |f0| and |f1|\n\tcsel\t$f1, $f1, @t[0],     hs\n\tcsel\t$g0, $g0, $g1,       hs\t// exchange |g0| and |g1|\n\tcsel\t$g1, $g1, @t[1],     hs\n\tlsr\t$a_lo, $a_lo, #1\n\tand\t@t[0], $f1, @t[3]\n\tand\t@t[1], $g1, @t[3]\n\tadd\t$f1, $f1, $f1\t\t// |f1|<<=1\n\tadd\t$g1, $g1, $g1\t\t// |g1|<<=1\n\tsub\t$f0, $f0, @t[0]\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\t$g0, $g0, @t[1]\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\t$cnt, .Loop_62_256\n\n\tret\n.size\t__inner_loop_62_256,.-__inner_loop_62_256\n___\n}\n\nforeach(split(\"\\n\",$code)) {\n    s/\\b(smaddl\\s+x[0-9]+,\\s)x([0-9]+,\\s+)x([0-9]+)/$1w$2w$3/;\n    print $_,\"\\n\";\n}\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ct_inverse_mod_256-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast Euclidean inversion as suggested in\n# https://eprint.iacr.org/2020/972. ~5.300 cycles on Coffee Lake.\n#\n# void ct_inverse_mod_256(vec512 ret, const vec256 inp, const vec256 mod,\n#                                                       const vec256 modx);\n#\n$python_ref.=<<'___';\ndef ct_inverse_mod_256(inp, mod):\n    a, u = inp, 1\n    b, v = mod, 0\n\n    k = 31\n    mask = (1 << k) - 1\n\n    for i in range(0, 512 // k - 1):\n        # __ab_approximation_31\n        n = max(a.bit_length(), b.bit_length())\n        if n < 64:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-k-2)) << k)\n            b_ = (b & mask) | ((b >> (n-k-2)) << k)\n\n        # __inner_loop_31\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n\n        # __smulq_256_n_shift_by_31\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if a < 0:\n            a, f0, g0 = -a, -f0, -g0\n        if b < 0:\n            b, f1, g1 = -b, -f1, -g1\n\n        # __smulq_512x63\n        u, v = u*f0 + v*g0, u*f1 + v*g1\n\n    if 512 % k + k:\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, 512 % k + k):\n            if a & 1:\n                if a < b:\n                    a, b, f0, g0, f1, g1 = b, a, f1, g1, f0, g0\n                a, f0, g0 = a-b, f0-f1, g0-g1\n            a, f1, g1 = a >> 1, f1 << 1, g1 << 1\n\n        v = u*f1 + v*g1\n\n    mod <<= 512 - mod.bit_length()  # align to the left\n    if v < 0:\n        v += mod\n    if v < 0:\n        v += mod\n    elif v == 1<<512:\n        v -= mod\n\n    return v & (2**512 - 1) # to be reduced % mod\n___\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\nmy ($out_ptr, $in_ptr, $n_ptr, $nx_ptr) = (\"%rdi\", \"%rsi\", \"%rdx\", \"%rcx\");\nmy @acc = map(\"%r$_\",(8..15));\nmy ($f0, $g0, $f1, $g1) = (\"%rdx\",\"%rcx\",\"%r12\",\"%r13\");\nmy $cnt = \"%edx\";\n\n$frame = 8*6+2*512;\n\n$code.=<<___;\n.text\n\n.globl\tct_inverse_mod_256\n.hidden\tct_inverse_mod_256\n.type\tct_inverse_mod_256,\\@function,4,\"unwind\"\n.align\t32\nct_inverse_mod_256:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tlea\t8*6+511(%rsp), %rax\t# find closest 512-byte-aligned spot\n\tand\t\\$-512, %rax\t\t# in the frame...\n\tmov\t$out_ptr, 8*4(%rsp)\n\tmov\t$nx_ptr,  8*5(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($in_ptr), @acc[0]\t# load input\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\n\tmov\t8*0($n_ptr), @acc[4]\t# load modulus\n\tmov\t8*1($n_ptr), @acc[5]\n\tmov\t8*2($n_ptr), @acc[6]\n\tmov\t8*3($n_ptr), @acc[7]\n\n\tmov\t@acc[0], 8*0(%rax)\t# copy input to |a|\n\tmov\t@acc[1], 8*1(%rax)\n\tmov\t@acc[2], 8*2(%rax)\n\tmov\t@acc[3], 8*3(%rax)\n\n\tmov\t@acc[4], 8*4(%rax)\t# copy modulus to |b|\n\tmov\t@acc[5], 8*5(%rax)\n\tmov\t@acc[6], 8*6(%rax)\n\tmov\t@acc[7], 8*7(%rax)\n\tmov\t%rax, $in_ptr\n\n\t################################# first iteration\n\tmov\t\\$31, $cnt\n\tcall\t__ab_approximation_31_256\n\t#mov\t$f0, 8*0(%rsp)\n\t#mov\t$g0, 8*1(%rsp)\n\tmov\t$f1, 8*2(%rsp)\n\tmov\t$g1, 8*3(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulq_256_n_shift_by_31\n\t#mov\t$f0, 8*0(%rsp)\t\t# corrected |f0|\n\t#mov\t$g0, 8*1(%rsp)\t\t# corrected |g0|\n\tmov\t$f0, 8*8($out_ptr)\t# initialize |u| with |f0|\n\n\tmov\t8*2(%rsp), $f0\t\t# |f1|\n\tmov\t8*3(%rsp), $g0\t\t# |g1|\n\tlea\t8*4($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_256_n_shift_by_31\n\t#mov\t$f0, 8*2(%rsp)\t\t# corrected |f1|\n\t#mov\t$g0, 8*3(%rsp)\t\t# corrected |g1|\n\tmov\t$f0, 8*9($out_ptr)\t# initialize |v| with |f1|\n\n\t################################# second iteration\n\txor\t\\$256, $in_ptr\t\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$31, $cnt\n\tcall\t__ab_approximation_31_256\n\t#mov\t$f0, 8*0(%rsp)\n\t#mov\t$g0, 8*1(%rsp)\n\tmov\t$f1, 8*2(%rsp)\n\tmov\t$g1, 8*3(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\t$f0, 8*0(%rsp)\t\t# corrected |f0|\n\tmov\t$g0, 8*1(%rsp)\t\t# corrected |g0|\n\n\tmov\t8*2(%rsp), $f0\t\t# |f1|\n\tmov\t8*3(%rsp), $g0\t\t# |g1|\n\tlea\t8*4($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_256_n_shift_by_31\n\t#mov\t$f0, 8*2(%rsp)\t\t# corrected |f1|\n\t#mov\t$g0, 8*3(%rsp)\t\t# corrected |g1|\n\n\tmov\t8*8($in_ptr),  @acc[0]\t# |u|\n\tmov\t8*13($in_ptr), @acc[4]\t# |v|\n\tmov\t@acc[0], @acc[1]\n\timulq\t8*0(%rsp), @acc[0]\t# |u|*|f0|\n\tmov\t@acc[4], @acc[5]\n\timulq\t8*1(%rsp), @acc[4]\t# |v|*|g0|\n\tadd\t@acc[4], @acc[0]\n\tmov\t@acc[0], 8*4($out_ptr)\t# destination |u|\n\tsar\t\\$63, @acc[0]\t\t# sign extension\n\tmov\t@acc[0], 8*5($out_ptr)\n\tmov\t@acc[0], 8*6($out_ptr)\n\tmov\t@acc[0], 8*7($out_ptr)\n\tmov\t@acc[0], 8*8($out_ptr)\n\tlea\t8*8($in_ptr), $in_ptr\t# make in_ptr \"rewindable\" with xor\n\n\timulq\t$f0, @acc[1]\t\t# |u|*|f1|\n\timulq\t$g0, @acc[5]\t\t# |v|*|g1|\n\tadd\t@acc[5], @acc[1]\n\tmov\t@acc[1], 8*9($out_ptr)\t# destination |v|\n\tsar\t\\$63, @acc[1]\t\t# sign extension\n\tmov\t@acc[1], 8*10($out_ptr)\n\tmov\t@acc[1], 8*11($out_ptr)\n\tmov\t@acc[1], 8*12($out_ptr)\n\tmov\t@acc[1], 8*13($out_ptr)\n___\nfor($i=2; $i<15; $i++) {\nmy $smul_512x63  = $i>8  ? \"__smulq_512x63\"\n                         : \"__smulq_256x63\";\n$code.=<<___;\n\txor\t\\$256+8*8, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$31, $cnt\n\tcall\t__ab_approximation_31_256\n\t#mov\t$f0, 8*0(%rsp)\n\t#mov\t$g0, 8*1(%rsp)\n\tmov\t$f1, 8*2(%rsp)\n\tmov\t$g1, 8*3(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\t$f0, 8*0(%rsp)\t\t# corrected |f0|\n\tmov\t$g0, 8*1(%rsp)\t\t# corrected |g0|\n\n\tmov\t8*2(%rsp), $f0\t\t# |f1|\n\tmov\t8*3(%rsp), $g0\t\t# |g1|\n\tlea\t8*4($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_256_n_shift_by_31\n\tmov\t$f0, 8*2(%rsp)\t\t# corrected |f1|\n\tmov\t$g0, 8*3(%rsp)\t\t# corrected |g1|\n\n\tmov\t8*0(%rsp), $f0\t\t# |f0|\n\tmov\t8*1(%rsp), $g0\t\t# |g0|\n\tlea\t8*8($in_ptr), $in_ptr\t# pointer to source |u|v|\n\tlea\t8*4($out_ptr), $out_ptr\t# pointer to destination |u|\n\tcall\t__smulq_256x63\n\n\tmov\t8*2(%rsp), $f0\t\t# |f1|\n\tmov\t8*3(%rsp), $g0\t\t# |g1|\n\tlea\t8*5($out_ptr),$out_ptr\t# pointer to destination |v|\n\tcall\t$smul_512x63\n___\n$code.=<<___\tif ($i==8);\n\tsar\t\\$63, %rbp\t\t# sign extension\n\tmov\t%rbp, 8*5($out_ptr)\n\tmov\t%rbp, 8*6($out_ptr)\n\tmov\t%rbp, 8*7($out_ptr)\n___\n}\n$code.=<<___;\n\t################################# two[!] last iterations in one go\n\txor\t\\$256+8*8, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$47, $cnt\t\t# 31 + 512 % 31\n\t#call\t__ab_approximation_31\t# |a| and |b| are exact, just load\n\tmov\t8*0($in_ptr), @acc[0]\t# |a_lo|\n\t#xor\t@acc[1],      @acc[1]\t# |a_hi|\n\tmov\t8*4($in_ptr), @acc[2]\t# |b_lo|\n\t#xor\t@acc[3],      @acc[3]\t# |b_hi|\n\tcall\t__inner_loop_62_256\n\t#mov\t$f0, 8*0(%rsp)\n\t#mov\t$g0, 8*1(%rsp)\n\t#mov\t$f1, 8*2(%rsp)\n\t#mov\t$g1, 8*3(%rsp)\n\n\t#mov\t8*0(%rsp), $f0\t\t# |f0|\n\t#mov\t8*1(%rsp), $g0\t\t# |g0|\n\tlea\t8*8($in_ptr), $in_ptr\t# pointer to source |u|v|\n\t#lea\t8*6($out_ptr), $out_ptr\t# pointer to destination |u|\n\t#call\t__smulq_256x63\n\n\t#mov\t8*2(%rsp), $f0\t\t# |f1|\n\t#mov\t8*3(%rsp), $g0\t\t# |g1|\n\tmov\t$f1, $f0\n\tmov\t$g1, $g0\n\tmov\t8*4(%rsp), $out_ptr\t# original |out_ptr|\n\tcall\t__smulq_512x63\n\tadc\t%rbp, %rdx\t\t# the excess limb of the result\n\n\tmov\t8*5(%rsp), $in_ptr\t# original |nx_ptr|\n\tmov\t%rdx, %rax\n\tsar\t\\$63, %rdx\t\t# result's sign as mask\n\n\tmov\t%rdx, @acc[0]\t\t# mask |modulus|\n\tmov\t%rdx, @acc[1]\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tand\t8*0($in_ptr), @acc[0]\n\tmov\t%rdx, @acc[2]\n\tand\t8*1($in_ptr), @acc[1]\n\tand\t8*2($in_ptr), @acc[2]\n\tand\t8*3($in_ptr), %rdx\n\n\tadd\t@acc[0], @acc[4]\t# conditionally add |modulus|<<256\n\tadc\t@acc[1], @acc[5]\n\tadc\t@acc[2], @acc[6]\n\tadc\t%rdx,    @acc[7]\n\tadc\t\\$0,     %rax\n\n\tmov\t%rax, %rdx\n\tneg\t%rax\n\tor\t%rax, %rdx\t\t# excess bit or sign as mask\n\tsar\t\\$63, %rax\t\t# excess bit as mask\n\n\tmov\t%rdx, @acc[0]\t\t# mask |modulus|\n\tmov\t%rdx, @acc[1]\n\tand\t8*0($in_ptr), @acc[0]\n\tmov\t%rdx, @acc[2]\n\tand\t8*1($in_ptr), @acc[1]\n\tand\t8*2($in_ptr), @acc[2]\n\tand\t8*3($in_ptr), %rdx\n\n\txor\t%rax, @acc[0]\t\t# conditionally negate |modulus|\n\txor\t%rcx, %rcx\n\txor\t%rax, @acc[1]\n\tsub\t%rax, %rcx\n\txor\t%rax, @acc[2]\n\txor\t%rax, %rdx\n\tadd\t%rcx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, %rdx\n\n\tadd\t@acc[0], @acc[4]\t# final adjustment for |modulus|<<256\n\tadc\t@acc[1], @acc[5]\n\tadc\t@acc[2], @acc[6]\n\tadc\t%rdx,    @acc[7]\n\n\tmov\t@acc[4], 8*4($out_ptr)\t# store absolute value\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\tmov\t@acc[7], 8*7($out_ptr)\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tct_inverse_mod_256,.-ct_inverse_mod_256\n___\n########################################################################\n# Signed |u|*|f?|+|v|*|g?| subroutines. \"NNN\" in \"NNNx63\" suffix refers\n# to the maximum bit-length of the *result*, and \"63\" - to the maximum\n# bit-length of the |f?| and |g?| single-limb multiplicands. However!\n# The latter should not be taken literally, as they are always chosen so\n# that \"bad things\" don't happen. For example, there comes a point when\n# |v| grows beyond 256 bits, while |u| remains 256 bits wide. Yet, we\n# always call __smulq_256x63 to perform |u|*|f0|+|v|*|g0| step. This is\n# because past that point |f0| is always 1 and |g0| is always 0. And,\n# since |u| never grows beyond 256 bits, __smulq_512x63 doesn't have to\n# perform full-width |u|*|f1| multiplication, half-width one with sign\n# extension is sufficient...\n$code.=<<___;\n.type\t__smulq_512x63,\\@abi-omnipotent\n.align\t32\n__smulq_512x63:\n\tmov\t8*0($in_ptr), @acc[0]\t# load |u|\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), %rbp\t# sign limb\n\n\tmov\t$f0, %rbx\n\tsar\t\\$63, $f0\t\t# |f0|'s sign as mask\n\txor\t%rax, %rax\n\tsub\t$f0, %rax\t\t# |f0|'s sign as bit\n\n\txor\t$f0, %rbx\t\t# conditionally negate |f0|\n\tadd\t%rax, %rbx\n\n\txor\t$f0, @acc[0]\t\t# conditionally negate |u|\n\txor\t$f0, @acc[1]\n\txor\t$f0, @acc[2]\n\txor\t$f0, @acc[3]\n\txor\t$f0, %rbp\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, %rbp\n\n\tmulq\t%rbx\t\t\t# |u|*|f0|\n\tmov\t%rax, 8*0($out_ptr)\t# offload |u|*|f0|\n\tmov\t@acc[1], %rax\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<3; $i++) {\n$code.=<<___;\n\tmulq\t%rbx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t@acc[$i], 8*$i($out_ptr)\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___;\n\tand\t%rbx, %rbp\n\tneg\t%rbp\n\tmulq\t%rbx\n\tadd\t%rax, @acc[3]\n\tadc\t%rdx, %rbp\n\tmov\t@acc[3], 8*3($out_ptr)\n\n\tmov\t8*5($in_ptr), @acc[0]\t# load |v|\n\tmov\t8*6($in_ptr), @acc[1]\n\tmov\t8*7($in_ptr), @acc[2]\n\tmov\t8*8($in_ptr), @acc[3]\n\tmov\t8*9($in_ptr), @acc[4]\n\tmov\t8*10($in_ptr), @acc[5]\n\tmov\t8*11($in_ptr), @acc[6]\n\tmov\t8*12($in_ptr), @acc[7]\n\n\tmov\t$g0, $f0\n\tsar\t\\$63, $f0\t\t# |g0|'s sign as mask\n\txor\t%rax, %rax\n\tsub\t$f0, %rax\t\t# |g0|'s sign as bit\n\n\txor\t$f0, $g0\t\t# conditionally negate |g0|\n\tadd\t%rax, $g0\n\n\txor\t$f0, @acc[0]\t\t# conditionally negate |v|\n\txor\t$f0, @acc[1]\n\txor\t$f0, @acc[2]\n\txor\t$f0, @acc[3]\n\txor\t$f0, @acc[4]\n\txor\t$f0, @acc[5]\n\txor\t$f0, @acc[6]\n\txor\t$f0, @acc[7]\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\tadc\t\\$0, @acc[7]\n\n\tmulq\t$g0\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<7; $i++) {\n$code.=<<___;\n\tmulq\t$g0\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___;\n\timulq\t$g0\n\tadd\t%rax, @acc[7]\n\tadc\t\\$0, %rdx\t\t# used in the final step\n\n\tmov\t%rbp, %rbx\n\tsar\t\\$63, %rbp\t\t# sign extension\n\n\tadd\t8*0($out_ptr), @acc[0]\t# accumulate |u|*|f0|\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t%rbx, @acc[4]\n\tadc\t%rbp, @acc[5]\n\tadc\t%rbp, @acc[6]\n\tadc\t%rbp, @acc[7]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\tmov\t@acc[7], 8*7($out_ptr)\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulq_512x63,.-__smulq_512x63\n\n.type\t__smulq_256x63,\\@abi-omnipotent\n.align\t32\n__smulq_256x63:\n___\nfor($j=0; $j<2; $j++) {\nmy $k = 8*5*$j;\nmy @acc=@acc;\t@acc=@acc[4..7]\tif($j);\nmy $top=\"%rbp\";\t$top=$g0\tif($j);\n$code.=<<___;\n\tmov\t$k+8*0($in_ptr), @acc[0] # load |u| (or |v|)\n\tmov\t$k+8*1($in_ptr), @acc[1]\n\tmov\t$k+8*2($in_ptr), @acc[2]\n\tmov\t$k+8*3($in_ptr), @acc[3]\n\tmov\t$k+8*4($in_ptr), $top\t# sign/excess limb\n\n\tmov\t$f0, %rbx\n\tsar\t\\$63, $f0\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t%rax, %rax\n\tsub\t$f0, %rax\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t$f0, %rbx\t\t# conditionally negate |f0|\n\tadd\t%rax, %rbx\n\n\txor\t$f0, @acc[0]\t\t# conditionally negate |u| (or |v|)\n\txor\t$f0, @acc[1]\n\txor\t$f0, @acc[2]\n\txor\t$f0, @acc[3]\n\txor\t$f0, $top\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, $top\n\n\tmulq\t%rbx\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<3; $i++) {\n$code.=<<___;\n\tmulq\t%rbx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___;\n\tand\t%rbx, $top\n\tneg\t$top\n\tmulq\t%rbx\n\tadd\t%rax, @acc[3]\n\tadc\t%rdx, $top\n___\n$code.=<<___\tif ($j==0);\n\tmov\t$g0, $f0\n___\n}\n$code.=<<___;\n\tadd\t@acc[4], @acc[0]\t# accumulate |u|*|f0|\n\tadc\t@acc[5], @acc[1]\n\tadc\t@acc[6], @acc[2]\n\tadc\t@acc[7], @acc[3]\n\tadc\t%rcx, %rbp\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t%rbp,    8*4($out_ptr)\n\n\tret\n.size\t__smulq_256x63,.-__smulq_256x63\n___\n########################################################################\n# Signed abs(|a|*|f?|+|b|*|g?|)>>k subroutines. \"NNN\" in the middle of\n# the names refers to maximum bit-lengths of |a| and |b|. As already\n# mentioned, |f?| and |g?| can be viewed as 63 bits wide, but are always\n# chosen so that \"bad things\" don't happen. For example, so that the\n# sum of the products doesn't overflow, and that the final result is\n# never wider than inputs...\n{\n$code.=<<___;\n.type\t__smulq_256_n_shift_by_31,\\@abi-omnipotent\n.align\t32\n__smulq_256_n_shift_by_31:\n\tmov\t$f0, 8*0($out_ptr)\t# offload |f0|\n\tmov\t$g0, 8*1($out_ptr)\t# offload |g0|\n\tmov\t$f0, %rbp\n___\nfor($j=0; $j<2; $j++) {\nmy $k = 8*4*$j;\nmy @acc=@acc;\t@acc=@acc[4..7] if ($j);\nmy $f0=\"%rbp\";\t$f0=$g0\t\tif ($j);\n$code.=<<___;\n\tmov\t$k+8*0($in_ptr), @acc[0] # load |a| (or |b|)\n\tmov\t$k+8*1($in_ptr), @acc[1]\n\tmov\t$k+8*2($in_ptr), @acc[2]\n\tmov\t$k+8*3($in_ptr), @acc[3]\n\n\tmov\t$f0, %rbx\n\tsar\t\\$63, $f0\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t%rax, %rax\n\tsub\t$f0, %rax\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t$f0, %rbx\t\t# conditionally negate |f0| (or |g0|)\n\tadd\t%rax, %rbx\n\n\txor\t$f0, @acc[0]\t\t# conditionally negate |a| (or |b|)\n\txor\t$f0, @acc[1]\n\txor\t$f0, @acc[2]\n\txor\t$f0, @acc[3]\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\n\tmulq\t%rbx\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tand\t%rbx, $f0\n\tneg\t$f0\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<3; $i++) {\n$code.=<<___;\n\tmulq\t%rbx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___;\n\tmulq\t%rbx\n\tadd\t%rax, @acc[3]\n\tadc\t%rdx, $f0\n___\n}\n$code.=<<___;\n\tadd\t@acc[4], @acc[0]\n\tadc\t@acc[5], @acc[1]\n\tadc\t@acc[6], @acc[2]\n\tadc\t@acc[7], @acc[3]\n\tadc\t$g0, %rbp\n\n\tmov\t8*0($out_ptr), $f0\t# restore original |f0|\n\tmov\t8*1($out_ptr), $g0\t# restore original |g0|\n\n\tshrd\t\\$31, @acc[1], @acc[0]\n\tshrd\t\\$31, @acc[2], @acc[1]\n\tshrd\t\\$31, @acc[3], @acc[2]\n\tshrd\t\\$31, %rbp,    @acc[3]\n\n\tsar\t\\$63, %rbp\t\t# sign as mask\n\txor\t%rax, %rax\n\tsub\t%rbp, %rax\t\t# sign as bit\n\n\txor\t%rbp, @acc[0]\t\t# conditionally negate the result\n\txor\t%rbp, @acc[1]\n\txor\t%rbp, @acc[2]\n\txor\t%rbp, @acc[3]\n\tadd\t%rax, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\n\txor\t%rbp, $f0\t\t# conditionally negate |f0|\n\txor\t%rbp, $g0\t\t# conditionally negate |g0|\n\tadd\t%rax, $f0\n\tadd\t%rax, $g0\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulq_256_n_shift_by_31,.-__smulq_256_n_shift_by_31\n___\n}\n\n{\nmy ($a_lo, $a_hi, $b_lo, $b_hi) = map(\"%r$_\",(8..11));\nmy ($t0, $t1, $t2, $t3, $t4) = (\"%rax\",\"%rbx\",\"%rbp\",\"%r14\",\"%r15\");\nmy ($fg0, $fg1, $bias) = ($g0, $g1, $t4);\nmy ($a_, $b_) = ($a_lo, $b_lo);\n{\nmy @a = ($a_lo, $t1, $a_hi);\nmy @b = ($b_lo, $t2, $b_hi);\n\n$code.=<<___;\n.type\t__ab_approximation_31_256,\\@abi-omnipotent\n.align\t32\n__ab_approximation_31_256:\n\tmov\t8*3($in_ptr), @a[2]\t# load |a| in reverse order\n\tmov\t8*7($in_ptr), @b[2]\t# load |b| in reverse order\n\tmov\t8*2($in_ptr), @a[1]\n\tmov\t8*6($in_ptr), @b[1]\n\tmov\t8*1($in_ptr), @a[0]\n\tmov\t8*5($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# check top-most limbs, ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tmov\t8*0($in_ptr), @a[0]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*4($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# ... and ones before that ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tcmovz\t@b[0], @b[1]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\n\tbsr\t$t0, %rcx\n\tlea\t1(%rcx), %rcx\n\tcmovz\t@a[0], @a[2]\n\tcmovz\t@b[0], @b[2]\n\tcmovz\t$t0, %rcx\n\tneg\t%rcx\n\t#and\t\\$63, %rcx\t\t# debugging artefact\n\n\tshldq\t%cl, @a[1], @a[2]\t# align second limb to the left\n\tshldq\t%cl, @b[1], @b[2]\n\n\tmov\t\\$0x7FFFFFFF, %eax\n\tand\t%rax, @a[0]\n\tand\t%rax, @b[0]\n\tnot\t%rax\n\tand\t%rax, @a[2]\n\tand\t%rax, @b[2]\n\tor\t@a[2], @a[0]\n\tor\t@b[2], @b[0]\n\n\tjmp\t__inner_loop_31_256\n\n\tret\n.size\t__ab_approximation_31_256,.-__ab_approximation_31_256\n___\n}\n$code.=<<___;\n.type\t__inner_loop_31_256,\\@abi-omnipotent\n.align\t32\t\t\t# comment and punish Coffee Lake by up to 40%\n__inner_loop_31_256:\t\t################# by Thomas Pornin\n\tmov\t\\$0x7FFFFFFF80000000, $fg0\t# |f0|=1, |g0|=0\n\tmov\t\\$0x800000007FFFFFFF, $fg1\t# |f1|=0, |g1|=1\n\tmov\t\\$0x7FFFFFFF7FFFFFFF, $bias\n\n.Loop_31_256:\n\tcmp\t$b_, $a_\t\t# if |a_|<|b_|, swap the variables\n\tmov\t$a_, $t0\n\tmov\t$b_, $t1\n\tmov\t$fg0, $t2\n\tmov\t$fg1, $t3\n\tcmovb\t$b_, $a_\n\tcmovb\t$t0, $b_\n\tcmovb\t$fg1, $fg0\n\tcmovb\t$t2, $fg1\n\n\tsub\t$b_, $a_\t\t# |a_|-|b_|\n\tsub\t$fg1, $fg0\t\t# |f0|-|f1|, |g0|-|g1|\n\tadd\t$bias, $fg0\n\n\ttest\t\\$1, $t0\t\t# if |a_| was even, roll back \n\tcmovz\t$t0, $a_\n\tcmovz\t$t1, $b_\n\tcmovz\t$t2, $fg0\n\tcmovz\t$t3, $fg1\n\n\tshr\t\\$1, $a_\t\t# |a_|>>=1\n\tadd\t$fg1, $fg1\t\t# |f1|<<=1, |g1|<<=1\n\tsub\t$bias, $fg1\n\tsub\t\\$1, $cnt\n\tjnz\t.Loop_31_256\n\n\tshr\t\\$32, $bias\n\tmov\t%ecx, %edx\t\t# $fg0, $f0\n\tmov\t${fg1}d, ${f1}d\n\tshr\t\\$32, $g0\n\tshr\t\\$32, $g1\n\tsub\t$bias, $f0\t\t# remove the bias\n\tsub\t$bias, $g0\n\tsub\t$bias, $f1\n\tsub\t$bias, $g1\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=$a_lo\n.size\t__inner_loop_31_256,.-__inner_loop_31_256\n\n.type\t__inner_loop_62_256,\\@abi-omnipotent\n.align\t32\n__inner_loop_62_256:\n\tmov\t$cnt, %r15d\n\tmov\t\\$1, $f0\t# |f0|=1\n\txor\t$g0, $g0\t# |g0|=0\n\txor\t$f1, $f1\t# |f1|=0\n\tmov\t$f0, $g1\t# |g1|=1\n\tmov\t$f0, %r14\n\n.Loop_62_256:\n\txor\t$t0, $t0\n\ttest\t%r14, $a_lo\t# if |a_| is odd, then we'll be subtracting |b_|\n\tmov\t$b_lo, $t1\n\tcmovnz\t$b_lo, $t0\n\tsub\t$a_lo, $t1\t# |b_|-|a_|\n\tmov\t$a_lo, $t2\n\tsub\t$t0, $a_lo\t# |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tcmovc\t$t1, $a_lo\t# borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcmovc\t$t2, $b_lo\t# |b_| = |a_|\n\tmov\t$f0, $t0\t# exchange |f0| and |f1|\n\tcmovc\t$f1, $f0\n\tcmovc\t$t0, $f1\n\tmov\t$g0, $t1\t# exchange |g0| and |g1|\n\tcmovc\t$g1, $g0\n\tcmovc\t$t1, $g1\n\txor\t$t0, $t0\n\txor\t$t1, $t1\n\tshr\t\\$1, $a_lo\n\ttest\t%r14, $t2\t# if |a_| was odd, then we'll be subtracting...\n\tcmovnz\t$f1, $t0\n\tcmovnz\t$g1, $t1\n\tadd\t$f1, $f1\t# |f1|<<=1\n\tadd\t$g1, $g1\t# |g1|<<=1\n\tsub\t$t0, $f0\t# |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\t$t1, $g0\t# |g0|-=|g1| (or |g0-=0| ...)\n\tsub\t\\$1, %r15d\n\tjnz\t.Loop_62_256\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=$a_lo\n.size\t__inner_loop_62_256,.-__inner_loop_62_256\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ct_inverse_mod_384-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast Euclidean inversion as suggested in\n# https://eprint.iacr.org/2020/972. Performance is >12x better [on\n# Cortex cores] than modulus-specific FLT addition chain...\n#\n# void ct_inverse_mod_384(vec768 ret, const vec384 inp, const vec384 mod);\n#\n$python_ref.=<<'___';\ndef ct_inverse_mod_384(inp, mod):\n    a, u = inp, 1\n    b, v = mod, 0\n\n    k = 62\n    w = 64\n    mask = (1 << w) - 1\n\n    for i in range(0, 768 // k):\n        # __ab_approximation_62\n        n = max(a.bit_length(), b.bit_length())\n        if n < 128:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-w)) << w)\n            b_ = (b & mask) | ((b >> (n-w)) << w)\n\n        # __inner_loop_62\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n\n        # __smul_384_n_shift_by_62\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if a < 0:\n            a, f0, g0 = -a, -f0, -g0\n        if b < 0:\n            b, f1, g1 = -b, -f1, -g1\n\n        # __smul_768x63\n        u, v = u*f0 + v*g0, u*f1 + v*g1\n\n    if 768 % k:\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, 768 % k):\n            if a & 1:\n                if a < b:\n                    a, b, f0, g0, f1, g1 = b, a, f1, g1, f0, g0\n                a, f0, g0 = a-b, f0-f1, g0-g1\n            a, f1, g1 = a >> 1, f1 << 1, g1 << 1\n\n        v = u*f1 + v*g1\n\n    mod <<= 768 - mod.bit_length()  # align to the left\n    if v < 0:\n        v += mod\n    if v < 0:\n        v += mod\n    elif v == 1<<768:\n        v -= mod\n\n    return v & (2**768 - 1) # to be reduced % mod\n___\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\nmy ($out_ptr, $in_ptr, $n_ptr, $nx_ptr) = map(\"x$_\", (0..3));\nmy @acc=map(\"x$_\",(3..14));\nmy ($f0, $g0, $f1, $g1, $f_, $g_) = map(\"x$_\",(15..17,19..21));\nmy $cnt = $n_ptr;\nmy @t = map(\"x$_\",(22..28,2));\nmy ($a_lo, $a_hi, $b_lo, $b_hi) = @acc[0,5,6,11];\n\n$frame = 32+2*512;\n\n$code.=<<___;\n.text\n\n.globl\tct_inverse_mod_384\n.hidden\tct_inverse_mod_384\n.type\tct_inverse_mod_384, %function\n.align\t5\nct_inverse_mod_384:\n\tpaciasp\n\tstp\tc29, c30, [csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29, csp, #0\n\tstp\tc19, c20, [csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21, c22, [csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23, c24, [csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25, c26, [csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27, c28, [csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp, csp, #$frame\n\n\tldp\t@t[0],   @acc[1], [$in_ptr,#8*0]\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2]\n\tldp\t@acc[4], @acc[5], [$in_ptr,#8*4]\n\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tcadd\t$in_ptr, csp, #32+511\n\talignd\t$in_ptr, $in_ptr, #9\n\tscbnds\t$in_ptr, $in_ptr, #512\n#else\n\tadd\t$in_ptr, sp, #32+511\t// find closest 512-byte-aligned spot\n\tand\t$in_ptr, $in_ptr, #-512\t// in the frame...\n#endif\n\tstp\tc0, c3, [csp]\t\t// offload out_ptr, nx_ptr\n\n\tldp\t@acc[6], @acc[7], [$n_ptr,#8*0]\n\tldp\t@acc[8], @acc[9], [$n_ptr,#8*2]\n\tldp\t@acc[10], @acc[11], [$n_ptr,#8*4]\n\n\tstp\t@t[0],   @acc[1], [$in_ptr,#8*0]\t// copy input to |a|\n\tstp\t@acc[2], @acc[3], [$in_ptr,#8*2]\n\tstp\t@acc[4], @acc[5], [$in_ptr,#8*4]\n\tstp\t@acc[6], @acc[7], [$in_ptr,#8*6]\t// copy modulus to |b|\n\tstp\t@acc[8], @acc[9], [$in_ptr,#8*8]\n\tstp\t@acc[10], @acc[11], [$in_ptr,#8*10]\n\n\t////////////////////////////////////////// first iteration\n\tmov\t$cnt, #62\n\tbl\t.Lab_approximation_62_loaded\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tstr\t$f0,[$out_ptr,#8*12]\t\t// initialize |u| with |f0|\n\n\tmov\t$f0, $f1\t\t\t// |f1|\n\tmov\t$g0, $g1\t\t\t// |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*6\t// pointer to dst |b|\n\tbl\t__smul_384_n_shift_by_62\n\tstr\t$f0, [$out_ptr,#8*14]\t\t// initialize |v| with |f1|\n\n\t////////////////////////////////////////// second iteration\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tmov\t$cnt, #62\n\tbl\t__ab_approximation_62\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\t$f_, $f0\t\t\t// corrected |f0|\n\tmov\t$g_, $g0\t\t\t// corrected |g0|\n\n\tmov\t$f0, $f1\t\t\t// |f1|\n\tmov\t$g0, $g1\t\t\t// |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*6\t// pointer to destination |b|\n\tbl\t__smul_384_n_shift_by_62\n\n\tldr\t@acc[4], [$in_ptr,#8*12]\t// |u|\n\tldr\t@acc[5], [$in_ptr,#8*20]\t// |v|\n\tmul\t@acc[0], $f_, @acc[4]\t\t// |u|*|f0|\n\tsmulh\t@acc[1], $f_, @acc[4]\n\tmul\t@acc[2], $g_, @acc[5]\t\t// |v|*|g0|\n\tsmulh\t@acc[3], $g_, @acc[5]\n\tadds\t@acc[0], @acc[0], @acc[2]\n\tadc\t@acc[1], @acc[1], @acc[3]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*6]\n\tasr\t@acc[2], @acc[1], #63\t\t// sign extension\n\tstp\t@acc[2], @acc[2], [$out_ptr,#8*8]\n\tstp\t@acc[2], @acc[2], [$out_ptr,#8*10]\n\n\tmul\t@acc[0], $f0, @acc[4]\t\t// |u|*|f1|\n\tsmulh\t@acc[1], $f0, @acc[4]\n\tmul\t@acc[2], $g0, @acc[5]\t\t// |v|*|g1|\n\tsmulh\t@acc[3], $g0, @acc[5]\n\tadds\t@acc[0], @acc[0], @acc[2]\n\tadc\t@acc[1], @acc[1], @acc[3]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*14]\n\tasr\t@acc[2], @acc[1], #63\t\t// sign extension\n\tstp\t@acc[2], @acc[2], [$out_ptr,#8*16]\n\tstp\t@acc[2], @acc[2], [$out_ptr,#8*18]\n___\nfor($i=2; $i<11; $i++) {\n$code.=<<___;\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tmov\t$cnt, #62\n\tbl\t__ab_approximation_62\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tbl\t__smul_384_n_shift_by_62\n\tmov\t$f_, $f0\t\t\t// corrected |f0|\n\tmov\t$g_, $g0\t\t\t// corrected |g0|\n\n\tmov\t$f0, $f1\t\t\t// |f1|\n\tmov\t$g0, $g1\t\t\t// |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*6\t// pointer to destination |b|\n\tbl\t__smul_384_n_shift_by_62\n\n\tcadd\t$out_ptr, $out_ptr, #8*6\t// pointer to destination |u|\n\tbl\t__smul_384x63\n___\n$code.=<<___\tif ($i==5);\n\tasr\t@t[5], @t[5], #63\n\tstr\t@t[5], [$out_ptr,#8*6]\n___\n$code.=<<___\tif ($i>5);\n\tadc\t@t[3], @t[3], @t[4]\n\tstr\t@t[3], [$out_ptr,#8*6]\n___\n$code.=<<___;\n\tmov\t$f_, $f0\t\t\t// corrected |f1|\n\tmov\t$g_, $g0\t\t\t// corrected |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*8\t// pointer to destination |v|\n\tbl\t__smul_384x63\n___\n$code.=<<___\tif ($i>5);\n\tbl\t__smul_768x63_tail\n___\n$code.=<<___\tif ($i==5);\n\tasr\t@t[5], @t[5], #63\t\t// sign extension\n\tstp\t@t[5], @t[5], [$out_ptr,#8*6]\n\tstp\t@t[5], @t[5], [$out_ptr,#8*8]\n\tstp\t@t[5], @t[5], [$out_ptr,#8*10]\n___\n}\n$code.=<<___;\n\t////////////////////////////////////////// iteration before last\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tmov\t$cnt, #62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldp\t$a_lo, $a_hi, [$in_ptr,#8*0]\t// just load\n\tldp\t$b_lo, $b_hi, [$in_ptr,#8*6]\n\tbl\t__inner_loop_62\n\n\teor\t$out_ptr, $in_ptr, #256\t\t// pointer to dst |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, $in_ptr, $out_ptr\n#endif\n\tstr\t$a_lo, [$out_ptr,#8*0]\n\tstr\t$b_lo, [$out_ptr,#8*6]\n\n\tmov\t$f_, $f0\t\t\t// exact |f0|\n\tmov\t$g_, $g0\t\t\t// exact |g0|\n\tmov\t$f0, $f1\n\tmov\t$g0, $g1\n\tcadd\t$out_ptr, $out_ptr, #8*12\t// pointer to dst |u|\n\tbl\t__smul_384x63\n\tadc\t@t[3], @t[3], @t[4]\n\tstr\t@t[3], [$out_ptr,#8*6]\n\n\tmov\t$f_, $f0\t\t\t// exact |f1|\n\tmov\t$g_, $g0\t\t\t// exact |g1|\n\tcadd\t$out_ptr, $out_ptr, #8*8\t// pointer to dst |v|\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\n\t////////////////////////////////////////// last iteration\n\teor\t$in_ptr, $in_ptr, #256\t\t// flip-flop src |a|b|u|v|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, $out_ptr, $in_ptr\n#endif\n\tmov\t$cnt, #24\t\t\t// 768 % 62\n\t//bl\t__ab_approximation_62\t\t// |a| and |b| are exact,\n\tldr\t$a_lo, [$in_ptr,#8*0]\t\t// just load\n\teor\t$a_hi, $a_hi, $a_hi\n\tldr\t$b_lo, [$in_ptr,#8*6]\n\teor\t$b_hi, $b_hi, $b_hi\n\tbl\t__inner_loop_62\n\n\tmov\t$f_, $f1\n\tmov\t$g_, $g1\n\tldp\tc0, c15, [csp]\t\t\t// original out_ptr and n_ptr\n\tbl\t__smul_384x63\n\tbl\t__smul_768x63_tail\n\tldr\tc30, [c29,#__SIZEOF_POINTER__]\n\n\tsmulh\t@t[1], @acc[5], $g_\t\t// figure out top-most limb\n\tadc\t@t[4], @t[4], @t[6]\n\tldp\t@acc[6], @acc[7], [$f0,#8*0]\t// load |mod|\n\tadd\t@t[1], @t[1], @t[4]\t\t// @t[1] is 1, 0 or -1\n\tldp\t@acc[8], @acc[9], [$f0,#8*2]\n\tasr\t@t[0], @t[1], #63\t\t// sign as mask\n\tldp\t@acc[10], @acc[11], [$f0,#8*4]\n\n\tand\t@t[4],   @acc[6], @t[0]\t\t// add mod<<384 conditionally\n\tand\t@t[5],   @acc[7], @t[0]\n\tadds\t@acc[0], @acc[0], @t[4]\n\tand\t@t[6],   @acc[8], @t[0]\n\tadcs\t@acc[1], @acc[1], @t[5]\n\tand\t@t[7],   @acc[9], @t[0]\n\tadcs\t@acc[2], @acc[2], @t[6]\n\tand\t@t[4],   @acc[10], @t[0]\n\tadcs\t@acc[3], @acc[3], @t[7]\n\tand\t@t[5],   @acc[11], @t[0]\n\tadcs\t@acc[4], @acc[4], @t[4]\n\tadcs\t@acc[5], @t[3],   @t[5]\n\tadc\t@t[1], @t[1], xzr\t\t// @t[1] is 1, 0 or -1\n\n\tneg\t@t[0], @t[1]\n\torr\t@t[1], @t[1], @t[0]\t\t// excess bit or sign as mask\n\tasr\t@t[0], @t[0], #63\t\t// excess bit as mask\n\n\tand\t@acc[6], @acc[6], @t[1]\t\t// mask |mod|\n\tand\t@acc[7], @acc[7], @t[1]\n\tand\t@acc[8], @acc[8], @t[1]\n\tand\t@acc[9], @acc[9], @t[1]\n\tand\t@acc[10], @acc[10], @t[1]\n\tand\t@acc[11], @acc[11], @t[1]\n\n\teor\t@acc[6],  @acc[6], @t[0]\t// conditionally negate |mod|\n\teor\t@acc[7],  @acc[7], @t[0]\n\tadds\t@acc[6],  @acc[6], @t[0], lsr#63\n\teor\t@acc[8],  @acc[8], @t[0]\n\tadcs\t@acc[7],  @acc[7], xzr\n\teor\t@acc[9],  @acc[9], @t[0]\n\tadcs\t@acc[8],  @acc[8], xzr\n\teor\t@acc[10], @acc[10], @t[0]\n\tadcs\t@acc[9],  @acc[9], xzr\n\teor\t@acc[11], @acc[11], @t[0]\n\tadcs\t@acc[10], @acc[10], xzr\n\tadc\t@acc[11], @acc[11], xzr\n\n\tadds\t@acc[0], @acc[0], @acc[6]\t// final adjustment for |mod|<<384\n\tadcs\t@acc[1], @acc[1], @acc[7]\n\tadcs\t@acc[2], @acc[2], @acc[8]\n\tadcs\t@acc[3], @acc[3], @acc[9]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*6]\n\tadcs\t@acc[4], @acc[4], @acc[10]\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*8]\n\tadc\t@acc[5], @acc[5], @acc[11]\n\tstp\t@acc[4], @acc[5], [$out_ptr,#8*10]\n\n\tadd\tcsp, csp, #$frame\n\tldp\tc19, c20, [c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21, c22, [c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23, c24, [c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25, c26, [c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27, c28, [c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29, [csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tct_inverse_mod_384,.-ct_inverse_mod_384\n\n////////////////////////////////////////////////////////////////////////\n// see corresponding commentary in ctx_inverse_mod_384-x86_64...\n.type\t__smul_384x63, %function\n.align\t5\n__smul_384x63:\n___\nfor($j=0; $j<2; $j++) {\nmy $f_ = $f_;   $f_ = $g_          if ($j);\nmy @acc = @acc; @acc = @acc[6..11] if ($j);\nmy $k = 8*12+8*8*$j;\n$code.=<<___;\n\tldp\t@acc[0], @acc[1], [$in_ptr,#8*0+$k]\t// load |u| (or |v|)\n\tasr\t$f1, $f_, #63\t\t// |f_|'s sign as mask (or |g_|'s)\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2+$k]\n\teor\t$f_, $f_, $f1\t\t// conditionally negate |f_| (or |g_|)\n\tldp\t@acc[4], @acc[5], [$in_ptr,#8*4+$k]\n\n\teor\t@acc[0], @acc[0], $f1\t// conditionally negate |u| (or |v|)\n\tldr\t@t[3+$j], [$in_ptr,#8*6+$k]\n\tsub\t$f_, $f_, $f1\n\teor\t@acc[1], @acc[1], $f1\n\tadds\t@acc[0], @acc[0], $f1, lsr#63\n\teor\t@acc[2], @acc[2], $f1\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], $f1\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@acc[4], @acc[4], $f1\n\tadcs\t@acc[3], @acc[3], xzr\n\t umulh\t@t[0], @acc[0], $f_\n\teor\t@acc[5], @acc[5], $f1\n\t umulh\t@t[1], @acc[1], $f_\n\tadcs\t@acc[4], @acc[4], xzr\n\t umulh\t@t[2], @acc[2], $f_\n\teor\t@t[3+$j], @t[3+$j], $f1\n\t mul\t@acc[0], @acc[0], $f_\n\tadcs\t@acc[5], @acc[5], xzr\n\t mul\t@acc[1], @acc[1], $f_\n\tadcs\t@t[3+$j], @t[3+$j], xzr\n___\n$code.=<<___\tif ($j);\n\tadc\t$g1, xzr, xzr\t\t// used in __smul_768x63_tail\n___\n$code.=<<___;\n\t cmp\t$f_, #0\n\tmul\t@acc[2], @acc[2], $f_\n\t csel\t@t[3+$j], @t[3+$j], xzr, ne\n\tadds\t@acc[1], @acc[1], @t[0]\n\tumulh\t@t[0], @acc[3], $f_\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tumulh\t@t[1], @acc[4], $f_\n\tmul\t@acc[3], @acc[3], $f_\n\tmul\t@acc[4], @acc[4], $f_\n\tadcs\t@acc[3], @acc[3], @t[2]\n\tmul\t@t[5+$j],@acc[5], $f_\n\tadcs\t@acc[4], @acc[4], @t[0]\n\tadcs\t@t[5+$j],@t[5+$j],@t[1]\n___\n$code.=<<___\tif ($j==0);\n\tadc\t@t[7], xzr, xzr\n___\n}\n$code.=<<___;\n\tadc\t@t[7], @t[7], xzr\n\n\tadds\t@acc[0], @acc[0], @acc[6]\n\tadcs\t@acc[1], @acc[1], @acc[7]\n\tadcs\t@acc[2], @acc[2], @acc[8]\n\tadcs\t@acc[3], @acc[3], @acc[9]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*0]\n\tadcs\t@acc[4], @acc[4], @acc[10]\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*2]\n\tadcs\t@t[5],   @t[5],   @t[6]\n\tstp\t@acc[4], @t[5],   [$out_ptr,#8*4]\n\n\tret\n.size\t__smul_384x63,.-__smul_384x63\n\n.type\t__smul_768x63_tail, %function\n.align\t5\n__smul_768x63_tail:\n\tumulh\t@t[5], @acc[5], $f_\n\tldr\t@acc[1], [$in_ptr,#8*27]// load rest of |v|\n\tadc\t@t[7], @t[7], xzr\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*28]\n\tand\t@t[3], @t[3], $f_\n\tldp\t@acc[4], @acc[5], [$in_ptr,#8*30]\n\tsub\t@t[5], @t[5], @t[3]\t// tie up |u|*|f1| chain\n\n\tumulh\t@acc[11], @acc[11], $g_\t// resume |v|*|g1| chain\n\teor\t@acc[1], @acc[1], $f1\t// conditionally negate rest of |v|\n\teor\t@acc[2], @acc[2], $f1\n\teor\t@acc[3], @acc[3], $f1\n\tadds\t@acc[1], @acc[1], $g1\n\teor\t@acc[4], @acc[4], $f1\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@acc[5], @acc[5], $f1\n\tadcs\t@acc[3], @acc[3], xzr\n\t umulh\t@t[0], @t[4],   $g_\n\tadcs\t@acc[4], @acc[4], xzr\n\t umulh\t@t[1], @acc[1], $g_\n\tadc\t@acc[5], @acc[5], xzr\n\n\tumulh\t@t[2], @acc[2], $g_\n\t add\t@acc[11], @acc[11], @t[7]\n\tumulh\t@t[3], @acc[3], $g_\n\t asr\t@t[6], @t[5], #63\n\tumulh\t@t[7], @acc[4], $g_\n\tmul\t@acc[0], @t[4],   $g_\n\tmul\t@acc[1], @acc[1], $g_\n\tmul\t@acc[2], @acc[2], $g_\n\tadds\t@acc[0], @acc[0], @acc[11]\n\tmul\t@acc[3], @acc[3], $g_\n\tadcs\t@acc[1], @acc[1], @t[0]\n\tmul\t@acc[4], @acc[4], $g_\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tmul\t@t[0],   @acc[5], $g_\n\tadcs\t@acc[3], @acc[3], @t[2]\n\tadcs\t@acc[4], @acc[4], @t[3]\n\tadcs\t@t[3],   @t[0], @t[7]\n\tadc\t@t[4], xzr, xzr\t\t// used in the final step\n\n\tadds\t@acc[0], @acc[0], @t[5]\n\tadcs\t@acc[1], @acc[1], @t[6]\n\tadcs\t@acc[2], @acc[2], @t[6]\n\tadcs\t@acc[3], @acc[3], @t[6]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*6]\n\tadcs\t@acc[4], @acc[4], @t[6]\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*8]\n\tadcs\t@t[3],   @t[3],   @t[6]\t// carry is used in the final step\n\tstp\t@acc[4], @t[3],   [$out_ptr,#8*10]\n\n\tret\n.size\t__smul_768x63_tail,.-__smul_768x63_tail\n\n.type\t__smul_384_n_shift_by_62, %function\n.align\t5\n__smul_384_n_shift_by_62:\n___\nfor($j=0; $j<2; $j++) {\nmy $f0 = $f0;   $f0 = $g0           if ($j);\nmy @acc = @acc; @acc = @acc[6..11]  if ($j);\nmy $k = 8*6*$j;\n$code.=<<___;\n\tldp\t@acc[0], @acc[1], [$in_ptr,#8*0+$k]\t// load |a| (or |b|)\n\tasr\t@t[6], $f0, #63\t\t// |f0|'s sign as mask (or |g0|'s)\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2+$k]\n\teor\t@t[7], $f0, @t[6]\t// conditionally negate |f0| (or |g0|)\n\tldp\t@acc[4], @acc[5], [$in_ptr,#8*4+$k]\n\n\teor\t@acc[0], @acc[0], @t[6]\t// conditionally negate |a| (or |b|)\n\tsub\t@t[7], @t[7], @t[6]\n\teor\t@acc[1], @acc[1], @t[6]\n\tadds\t@acc[0], @acc[0], @t[6], lsr#63\n\teor\t@acc[2], @acc[2], @t[6]\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], @t[6]\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@acc[4], @acc[4], @t[6]\n\t umulh\t@t[0], @acc[0], @t[7]\n\tadcs\t@acc[3], @acc[3], xzr\n\t umulh\t@t[1], @acc[1], @t[7]\n\teor\t@acc[5], @acc[5], @t[6]\n\t mul\t@acc[0], @acc[0], @t[7]\n\tadcs\t@acc[4], @acc[4], xzr\n\t mul\t@acc[1], @acc[1], @t[7]\n\tadc\t@acc[5], @acc[5], xzr\n\n\tumulh\t@t[2], @acc[2], @t[7]\n\t and\t@t[6], @t[6], @t[7]\n\tumulh\t@t[3], @acc[3], @t[7]\n\tadds\t@acc[1], @acc[1], @t[0]\n\tmul\t@acc[2], @acc[2], @t[7]\n\tumulh\t@t[0], @acc[4], @t[7]\n\t neg\t@t[6], @t[6]\n\tmul\t@acc[3], @acc[3], @t[7]\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tumulh\t@t[1], @acc[5], @t[7]\n\tmul\t@acc[4], @acc[4], @t[7]\n\tadcs\t@acc[3], @acc[3], @t[2]\n\tmul\t@acc[5], @acc[5], @t[7]\n\tadcs\t@acc[4], @acc[4], @t[3]\n\tadcs\t@acc[5], @acc[5], @t[0]\n\tadc\t@t[5+$j], @t[1], @t[6]\n___\n}\n$code.=<<___;\n\tadds\t@acc[0], @acc[0], @acc[6]\n\tadcs\t@acc[1], @acc[1], @acc[7]\n\tadcs\t@acc[2], @acc[2], @acc[8]\n\tadcs\t@acc[3], @acc[3], @acc[9]\n\tadcs\t@acc[4], @acc[4], @acc[10]\n\tadcs\t@acc[5], @acc[5], @acc[11]\n\tadc\t@acc[6], @t[5],   @t[6]\n\n\textr\t@acc[0], @acc[1], @acc[0], #62\n\textr\t@acc[1], @acc[2], @acc[1], #62\n\textr\t@acc[2], @acc[3], @acc[2], #62\n\tasr\t@t[6], @acc[6], #63\n\textr\t@acc[3], @acc[4], @acc[3], #62\n\textr\t@acc[4], @acc[5], @acc[4], #62\n\textr\t@acc[5], @acc[6], @acc[5], #62\n\n\teor\t@acc[0], @acc[0], @t[6]\n\teor\t@acc[1], @acc[1], @t[6]\n\tadds\t@acc[0], @acc[0], @t[6], lsr#63\n\teor\t@acc[2], @acc[2], @t[6]\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], @t[6]\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@acc[4], @acc[4], @t[6]\n\tadcs\t@acc[3], @acc[3], xzr\n\teor\t@acc[5], @acc[5], @t[6]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*0]\n\tadcs\t@acc[4], @acc[4], xzr\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*2]\n\tadc\t@acc[5], @acc[5], xzr\n\tstp\t@acc[4], @acc[5], [$out_ptr,#8*4]\n\n\teor\t$f0, $f0, @t[6]\n\teor\t$g0, $g0, @t[6]\n\tsub\t$f0, $f0, @t[6]\n\tsub\t$g0, $g0, @t[6]\n\n\tret\n.size\t__smul_384_n_shift_by_62,.-__smul_384_n_shift_by_62\n___\n\n{\nmy @a = @acc[0..5];\nmy @b = @acc[6..11];\n\n$code.=<<___;\n.type\t__ab_approximation_62, %function\n.align\t4\n__ab_approximation_62:\n\tldp\t@a[4], @a[5], [$in_ptr,#8*4]\n\tldp\t@b[4], @b[5], [$in_ptr,#8*10]\n\tldp\t@a[2], @a[3], [$in_ptr,#8*2]\n\tldp\t@b[2], @b[3], [$in_ptr,#8*8]\n\n.Lab_approximation_62_loaded:\n\torr\t@t[0], @a[5], @b[5]\t// check top-most limbs, ...\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[3], ne\n\torr\t@t[0], @a[5], @b[5]\t// ... ones before top-most, ...\n\tcsel\t@b[4], @b[4], @b[3], ne\n\n\tldp\t@a[0], @a[1], [$in_ptr,#8*0]\n\tldp\t@b[0], @b[1], [$in_ptr,#8*6]\n\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[2], ne\n\torr\t@t[0], @a[5], @b[5]\t// ... and ones before that ...\n\tcsel\t@b[4], @b[4], @b[2], ne\n\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[1], ne\n\torr\t@t[0], @a[5], @b[5]\n\tcsel\t@b[4], @b[4], @b[1], ne\n\n\tclz\t@t[0], @t[0]\n\tcmp\t@t[0], #64\n\tcsel\t@t[0], @t[0], xzr, ne\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tneg\t@t[1], @t[0]\n\n\tlslv\t@a[5], @a[5], @t[0]\t// align high limbs to the left\n\tlslv\t@b[5], @b[5], @t[0]\n\tlsrv\t@a[4], @a[4], @t[1]\n\tlsrv\t@b[4], @b[4], @t[1]\n\tand\t@a[4], @a[4], @t[1], asr#6\n\tand\t@b[4], @b[4], @t[1], asr#6\n\torr\t@a[5], @a[5], @a[4]\n\torr\t@b[5], @b[5], @b[4]\n\n\tb\t__inner_loop_62\n\tret\n.size\t__ab_approximation_62,.-__ab_approximation_62\n___\n}\n$code.=<<___;\n.type\t__inner_loop_62, %function\n.align\t4\n__inner_loop_62:\n\tmov\t$f0, #1\t\t// |f0|=1\n\tmov\t$g0, #0\t\t// |g0|=0\n\tmov\t$f1, #0\t\t// |f1|=0\n\tmov\t$g1, #1\t\t// |g1|=1\n\n.Loop_62:\n\tsbfx\t@t[6], $a_lo, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\tsub\t$cnt, $cnt, #1\n\tsubs\t@t[2], $b_lo, $a_lo\t// |b_|-|a_|\n\tand\t@t[0], $b_lo, @t[6]\n\tsbc\t@t[3], $b_hi, $a_hi\n\tand\t@t[1], $b_hi, @t[6]\n\tsubs\t@t[4], $a_lo, @t[0]\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tmov\t@t[0], $f0\n\tsbcs\t@t[5], $a_hi, @t[1]\n\tmov\t@t[1], $g0\n\tcsel\t$b_lo, $b_lo, $a_lo, hs\t// |b_| = |a_|\n\tcsel\t$b_hi, $b_hi, $a_hi, hs\n\tcsel\t$a_lo, @t[4], @t[2], hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\t$a_hi, @t[5], @t[3], hs\n\tcsel\t$f0, $f0, $f1,       hs\t// exchange |f0| and |f1|\n\tcsel\t$f1, $f1, @t[0],     hs\n\tcsel\t$g0, $g0, $g1,       hs\t// exchange |g0| and |g1|\n\tcsel\t$g1, $g1, @t[1],     hs\n\textr\t$a_lo, $a_hi, $a_lo, #1\n\tlsr\t$a_hi, $a_hi, #1\n\tand\t@t[0], $f1, @t[6]\n\tand\t@t[1], $g1, @t[6]\n\tadd\t$f1, $f1, $f1\t\t// |f1|<<=1\n\tadd\t$g1, $g1, $g1\t\t// |g1|<<=1\n\tsub\t$f0, $f0, @t[0]\t\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\t$g0, $g0, @t[1]\t\t// |g0|-=|g1| (or |g0-=0| ...)\n\tcbnz\t$cnt, .Loop_62\n\n\tret\n.size\t__inner_loop_62,.-__inner_loop_62\n___\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ct_is_square_mod_384-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast quadratic residue test as suggested in\n# https://eprint.iacr.org/2020/972. Performance is >12x better [on\n# Cortex cores] than modulus-specific Legendre symbol addition chain...\n#\n# bool ct_is_square_mod_384(const vec384 inp, const vec384 mod);\n#\n$python_ref.=<<'___';\ndef ct_is_square_mod_384(inp, mod):\n    a = inp\n    b = mod\n    L = 0   # only least significant bit, adding 1 makes up for sign change\n\n    k = 30\n    w = 32\n    mask = (1 << w) - 1\n\n    for i in range(0, 768 // k - 1):\n        # __ab_approximation_30\n        n = max(a.bit_length(), b.bit_length())\n        if n < 64:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-w)) << w)\n            b_ = (b & mask) | ((b >> (n-w)) << w)\n\n        # __inner_loop_30\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                    L += (a_ & b_) >> 1 # |a| and |b| are both odd, second bits\n                                        # tell the whole story\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n            L += (b_ + 2) >> 2          # if |b|%8 is 3 or 5 [out of 1,3,5,7]\n\n        # __smulq_384_n_shift_by_30\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if b < 0:\n            b = -b\n        if a < 0:\n            a = -a\n            L += (b % 4) >> 1           # |b| is always odd, the second bit\n                                        # tells the whole story\n\n    if True:\n        for j in range(0, 768 % k + k):\n            if a & 1:\n                if a < b:\n                    a, b = b, a\n                    L += (a & b) >> 1   # |a| and |b| are both odd, second bits\n                                        # tell the whole story\n                a = a-b\n            a = a >> 1\n            L += (b + 2) >> 2           # if |b|%8 is 3 or 5 [out of 1,3,5,7]\n\n    return (L & 1) ^ 1\n___\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\nmy ($in_ptr, $out_ptr, $L) = map(\"x$_\", (0..2));\nmy @acc=map(\"x$_\",(3..14));\nmy ($cnt, $f0, $g0, $f1, $g1) = map(\"x$_\",(15..17,19..20));\nmy @t = map(\"x$_\",(21..28));\nmy ($a_, $b_) = @acc[5,11];\n\n$frame = 2*256;\n\n$code.=<<___;\n.text\n\n.globl\tct_is_square_mod_384\n.hidden\tct_is_square_mod_384\n.type\tct_is_square_mod_384, %function\n.align\t5\nct_is_square_mod_384:\n\tpaciasp\n\tstp\tc29, c30, [csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29, csp, #0\n\tstp\tc19, c20, [csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21, c22, [csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23, c24, [csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25, c26, [csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27, c28, [csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp, csp, #$frame\n\n\tldp\t@acc[0], @acc[1], [x0,#8*0]\t\t// load input\n\tldp\t@acc[2], @acc[3], [x0,#8*2]\n\tldp\t@acc[4], @acc[5], [x0,#8*4]\n\n\tadd\t$in_ptr, sp, #255\t// find closest 256-byte-aligned spot\n\tand\t$in_ptr, $in_ptr, #-256\t// in the frame...\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, csp, $in_ptr\n#endif\n\n\tldp\t@acc[6], @acc[7], [x1,#8*0]\t\t// load modulus\n\tldp\t@acc[8], @acc[9], [x1,#8*2]\n\tldp\t@acc[10], @acc[11], [x1,#8*4]\n\n\tstp\t@acc[0], @acc[1], [$in_ptr,#8*6]\t// copy input to |a|\n\tstp\t@acc[2], @acc[3], [$in_ptr,#8*8]\n\tstp\t@acc[4], @acc[5], [$in_ptr,#8*10]\n\tstp\t@acc[6], @acc[7], [$in_ptr,#8*0]\t// copy modulus to |b|\n\tstp\t@acc[8], @acc[9], [$in_ptr,#8*2]\n\tstp\t@acc[10], @acc[11], [$in_ptr,#8*4]\n\n\teor\t$L, $L, $L\t\t\t// init the Legendre symbol\n\tmov\t$cnt, #24\t\t\t// 24 is 768/30-1\n\tb\t.Loop_is_square\n\n.align\t4\n.Loop_is_square:\n\tbl\t__ab_approximation_30\n\tsub\t$cnt, $cnt, #1\n\n\teor\t$out_ptr, $in_ptr, #128\t\t// pointer to dst |b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $out_ptr, csp, $out_ptr\n#endif\n\tbl\t__smul_384_n_shift_by_30\n\n\tmov\t$f1, $f0\t\t\t// |f0|\n\tmov\t$g1, $g0\t\t\t// |g0|\n\tcadd\t$out_ptr, $out_ptr, #8*6\t// pointer to dst |a|\n\tbl\t__smul_384_n_shift_by_30\n\n\tldp\t@acc[6], @acc[7], [$out_ptr,#-8*6]\n\teor\t$in_ptr, $in_ptr, #128\t\t// flip-flop src |a|b|\n#ifdef\t__CHERI_PURE_CAPABILITY__\n\tscvalue $in_ptr, csp, $in_ptr\n#endif\n\tand\t@t[6], @t[6], @acc[6]\t\t// if |a| was negative,\n\tadd\t$L, $L, @t[6], lsr#1\t\t// adjust |L|\n\n\tcbnz\t$cnt, .Loop_is_square\n\n\t////////////////////////////////////////// last iteration\n\t//bl\t__ab_approximation_30\t\t// |a| and |b| are exact,\n\t//ldr\t$a_, [$in_ptr,#8*6]\t\t// and loaded\n\t//ldr\t$b_, [$in_ptr,#8*0]\n\tmov\t$cnt, #48\t\t\t// 48 is 768%30 + 30\n\tbl\t__inner_loop_48\n\tldr\tc30, [c29,#__SIZEOF_POINTER__]\n\n\tand\tx0, $L, #1\n\teor\tx0, x0, #1\n\n\tadd\tcsp, csp, #$frame\n\tldp\tc19, c20, [c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21, c22, [c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23, c24, [c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25, c26, [c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27, c28, [c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29, [csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tct_is_square_mod_384,.-ct_is_square_mod_384\n\n.type\t__smul_384_n_shift_by_30, %function\n.align\t5\n__smul_384_n_shift_by_30:\n___\nfor($j=0; $j<2; $j++) {\nmy $fx = $g1;   $fx = $f1           if ($j);\nmy @acc = @acc; @acc = @acc[6..11]  if ($j);\nmy $k = 8*6*$j;\n$code.=<<___;\n\tldp\t@acc[0], @acc[1], [$in_ptr,#8*0+$k]\t// load |b| (or |a|)\n\tasr\t@t[6], $fx, #63\t\t// |g1|'s sign as mask (or |f1|'s)\n\tldp\t@acc[2], @acc[3], [$in_ptr,#8*2+$k]\n\teor\t$fx, $fx, @t[6]\t\t// conditionally negate |g1| (or |f1|)\n\tldp\t@acc[4], @acc[5], [$in_ptr,#8*4+$k]\n\n\teor\t@acc[0], @acc[0], @t[6]\t// conditionally negate |b| (or |a|)\n\tsub\t$fx, $fx, @t[6]\n\teor\t@acc[1], @acc[1], @t[6]\n\tadds\t@acc[0], @acc[0], @t[6], lsr#63\n\teor\t@acc[2], @acc[2], @t[6]\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], @t[6]\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@acc[4], @acc[4], @t[6]\n\t umulh\t@t[0], @acc[0], $fx\n\tadcs\t@acc[3], @acc[3], xzr\n\t umulh\t@t[1], @acc[1], $fx\n\teor\t@acc[5], @acc[5], @t[6]\n\t umulh\t@t[2], @acc[2], $fx\n\tadcs\t@acc[4], @acc[4], xzr\n\t umulh\t@t[3], @acc[3], $fx\n\tadc\t@acc[5], @acc[5], xzr\n\n\tumulh\t@t[4], @acc[4], $fx\n\tand\t@t[7], $fx, @t[6]\n\tumulh\t@t[5+$j], @acc[5], $fx\n\tneg\t@t[7], @t[7]\n\tmul\t@acc[0], @acc[0], $fx\n\tmul\t@acc[1], @acc[1], $fx\n\tmul\t@acc[2], @acc[2], $fx\n\tadds\t@acc[1], @acc[1], @t[0]\n\tmul\t@acc[3], @acc[3], $fx\n\tadcs\t@acc[2], @acc[2], @t[1]\n\tmul\t@acc[4], @acc[4], $fx\n\tadcs\t@acc[3], @acc[3], @t[2]\n\tmul\t@acc[5], @acc[5], $fx\n\tadcs\t@acc[4], @acc[4], @t[3]\n\tadcs\t@acc[5], @acc[5] ,@t[4]\n\tadc\t@t[5+$j], @t[5+$j], @t[7]\n___\n}\n$code.=<<___;\n\tadds\t@acc[0], @acc[0], @acc[6]\n\tadcs\t@acc[1], @acc[1], @acc[7]\n\tadcs\t@acc[2], @acc[2], @acc[8]\n\tadcs\t@acc[3], @acc[3], @acc[9]\n\tadcs\t@acc[4], @acc[4], @acc[10]\n\tadcs\t@acc[5], @acc[5], @acc[11]\n\tadc\t@acc[6], @t[5],   @t[6]\n\n\textr\t@acc[0], @acc[1], @acc[0], #30\n\textr\t@acc[1], @acc[2], @acc[1], #30\n\textr\t@acc[2], @acc[3], @acc[2], #30\n\tasr\t@t[6], @acc[6], #63\n\textr\t@acc[3], @acc[4], @acc[3], #30\n\textr\t@acc[4], @acc[5], @acc[4], #30\n\textr\t@acc[5], @acc[6], @acc[5], #30\n\n\teor\t@acc[0], @acc[0], @t[6]\n\teor\t@acc[1], @acc[1], @t[6]\n\tadds\t@acc[0], @acc[0], @t[6], lsr#63\n\teor\t@acc[2], @acc[2], @t[6]\n\tadcs\t@acc[1], @acc[1], xzr\n\teor\t@acc[3], @acc[3], @t[6]\n\tadcs\t@acc[2], @acc[2], xzr\n\teor\t@acc[4], @acc[4], @t[6]\n\tadcs\t@acc[3], @acc[3], xzr\n\teor\t@acc[5], @acc[5], @t[6]\n\tstp\t@acc[0], @acc[1], [$out_ptr,#8*0]\n\tadcs\t@acc[4], @acc[4], xzr\n\tstp\t@acc[2], @acc[3], [$out_ptr,#8*2]\n\tadc\t@acc[5], @acc[5], xzr\n\tstp\t@acc[4], @acc[5], [$out_ptr,#8*4]\n\n\tret\n.size\t__smul_384_n_shift_by_30,.-__smul_384_n_shift_by_30\n___\n\n{\nmy @a = @acc[0..5];\nmy @b = @acc[6..11];\nmy ($fg0, $fg1, $bias, $cnt) = ($g0, $g1, @t[6], @t[7]);\n\n$code.=<<___;\n.type\t__ab_approximation_30, %function\n.align\t4\n__ab_approximation_30:\n\tldp\t@b[4], @b[5], [$in_ptr,#8*4]\t// |a| is still in registers\n\tldp\t@b[2], @b[3], [$in_ptr,#8*2]\n\n\torr\t@t[0], @a[5], @b[5]\t// check top-most limbs, ...\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[3], ne\n\torr\t@t[0], @a[5], @b[5]\t// ... ones before top-most, ...\n\tcsel\t@b[4], @b[4], @b[3], ne\n\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[2], ne\n\torr\t@t[0], @a[5], @b[5]\t// ... and ones before that ...\n\tcsel\t@b[4], @b[4], @b[2], ne\n\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[1], ne\n\torr\t@t[0], @a[5], @b[5]\t// and one more, ...\n\tcsel\t@b[4], @b[4], @b[1], ne\n\n\tcmp\t@t[0], #0\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tcsel\t@a[4], @a[4], @a[0], ne\n\torr\t@t[0], @a[5], @b[5]\n\tcsel\t@b[4], @b[4], @b[0], ne\n\n\tclz\t@t[0], @t[0]\n\tcmp\t@t[0], #64\n\tcsel\t@t[0], @t[0], xzr, ne\n\tcsel\t@a[5], @a[5], @a[4], ne\n\tcsel\t@b[5], @b[5], @b[4], ne\n\tneg\t@t[1], @t[0]\n\n\tlslv\t@a[5], @a[5], @t[0]\t// align high limbs to the left\n\tlslv\t@b[5], @b[5], @t[0]\n\tlsrv\t@a[4], @a[4], @t[1]\n\tlsrv\t@b[4], @b[4], @t[1]\n\tand\t@a[4], @a[4], @t[1], asr#6\n\tand\t@b[4], @b[4], @t[1], asr#6\n\torr\t$a_, @a[5], @a[4]\n\torr\t$b_, @b[5], @b[4]\n\n\tbfxil\t$a_, @a[0], #0, #32\n\tbfxil\t$b_, @b[0], #0, #32\n\n\tb\t__inner_loop_30\n\tret\n.size\t__ab_approximation_30,.-__ab_approximation_30\n\n.type\t__inner_loop_30, %function\n.align\t4\n__inner_loop_30:\n\tmov\t$cnt, #30\n\tmov\t$fg0, #0x7FFFFFFF80000000\t// |f0|=1, |g0|=0\n\tmov\t$fg1, #0x800000007FFFFFFF\t// |f1|=0, |g1|=1\n\tmov\t$bias,#0x7FFFFFFF7FFFFFFF\n\n.Loop_30:\n\tsbfx\t@t[3], $a_, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\t and\t@t[4], $a_, $b_\n\tsub\t$cnt, $cnt, #1\n\tand\t@t[0], $b_, @t[3]\n\n\tsub\t@t[1], $b_, $a_\t\t// |b_|-|a_|\n\tsubs\t@t[2], $a_, @t[0]\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\t add\t@t[4], $L, @t[4], lsr#1\t// L + (a_ & b_) >> 1\n\tmov\t@t[0], $fg1\n\tcsel\t$b_, $b_, $a_, hs\t// |b_| = |a_|\n\tcsel\t$a_, @t[2], @t[1], hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcsel\t$fg1, $fg1, $fg0,  hs\t// exchange |fg0| and |fg1|\n\tcsel\t$fg0, $fg0, @t[0], hs\n\t csel\t$L,   $L,   @t[4], hs\n\tlsr\t$a_, $a_, #1\n\tand\t@t[0], $fg1, @t[3]\n\tand\t@t[1], $bias, @t[3]\n\t add\t$t[2], $b_, #2\n\tsub\t$fg0, $fg0, @t[0]\t// |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tadd\t$fg1, $fg1, $fg1\t// |f1|<<=1\n\t add\t$L, $L, $t[2], lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\tadd\t$fg0, $fg0, @t[1]\n\tsub\t$fg1, $fg1, $bias\n\n\tcbnz\t$cnt, .Loop_30\n\n\tmov\t$bias, #0x7FFFFFFF\n\tubfx\t$f0, $fg0, #0, #32\n\tubfx\t$g0, $fg0, #32, #32\n\tubfx\t$f1, $fg1, #0, #32\n\tubfx\t$g1, $fg1, #32, #32\n\tsub\t$f0, $f0, $bias\t\t// remove the bias\n\tsub\t$g0, $g0, $bias\n\tsub\t$f1, $f1, $bias\n\tsub\t$g1, $g1, $bias\n\n\tret\n.size\t__inner_loop_30,.-__inner_loop_30\n___\n}\n\n{\nmy ($a_, $b_) = (@acc[0], @acc[6]);\n$code.=<<___;\n.type\t__inner_loop_48, %function\n.align\t4\n__inner_loop_48:\n.Loop_48:\n\tsbfx\t@t[3], $a_, #0, #1\t// if |a_| is odd, then we'll be subtracting\n\t and\t@t[4], $a_, $b_\n\tsub\t$cnt, $cnt, #1\n\tand\t@t[0], $b_, @t[3]\n\tsub\t@t[1], $b_, $a_\t\t// |b_|-|a_|\n\tsubs\t@t[2], $a_, @t[0]\t// |a_|-|b_| (or |a_|-0 if |a_| was even)\n\t add\t@t[4], $L, @t[4], lsr#1\n\tcsel\t$b_, $b_, $a_, hs\t// |b_| = |a_|\n\tcsel\t$a_, @t[2], @t[1], hs\t// borrow means |a_|<|b_|, replace with |b_|-|a_|\n\t csel\t$L,   $L,   @t[4], hs\n\t add\t$t[2], $b_, #2\n\tlsr\t$a_, $a_, #1\n\t add\t$L, $L, $t[2], lsr#2\t// \"negate\" |L| if |b|%8 is 3 or 5\n\n\tcbnz\t$cnt, .Loop_48\n\n\tret\n.size\t__inner_loop_48,.-__inner_loop_48\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ct_is_square_mod_384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast quadratic residue test as suggested in\n# https://eprint.iacr.org/2020/972. Performance is >5x better than\n# modulus-specific Legendre symbol addition chain...\n#\n# bool ct_is_square_mod_384(const vec384 inp, const vec384 mod);\n#\n$python_ref.=<<'___';\ndef ct_is_square_mod_384(inp, mod):\n    a = inp\n    b = mod\n    L = 0   # only least significant bit, adding 1 makes up for sign change\n\n    k = 30\n    w = 32\n    mask = (1 << w) - 1\n\n    for i in range(0, 768 // k - 1):\n        # __ab_approximation_30\n        n = max(a.bit_length(), b.bit_length())\n        if n < 64:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-w)) << w)\n            b_ = (b & mask) | ((b >> (n-w)) << w)\n\n        # __inner_loop_30\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                    L += (a_ & b_) >> 1 # |a| and |b| are both odd, second bits\n                                        # tell the whole story\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n            L += (b_ + 2) >> 2          # if |b|%8 is 3 or 5 [out of 1,3,5,7]\n\n        # __smulq_384_n_shift_by_30\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if b < 0:\n            b = -b\n        if a < 0:\n            a = -a\n            L += (b % 4) >> 1           # |b| is always odd, the second bit\n                                        # tells the whole story\n\n    if True:\n        for j in range(0, 768 % k + k):\n            if a & 1:\n                if a < b:\n                    a, b = b, a\n                    L += (a & b) >> 1   # |a| and |b| are both odd, second bits\n                                        # tell the whole story\n                a = a-b\n            a = a >> 1\n            L += (b + 2) >> 2           # if |b|%8 is 3 or 5 [out of 1,3,5,7]\n\n    return (L & 1) ^ 1\n___\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\nmy ($out_ptr, $in_ptr) = (\"%rdi\", \"%rsi\");\nmy ($f0, $g0, $f1, $g1) = (\"%rax\", \"%rbx\", \"%rdx\",\"%rcx\");\nmy @acc=map(\"%r$_\",(8..15));\nmy $L = \"%rbp\";\n\n$frame = 8*3+2*256;\n\n$code.=<<___;\n.text\n\n.globl\tct_is_square_mod_384\n.hidden\tct_is_square_mod_384\n.type\tct_is_square_mod_384,\\@function,2,\"unwind\"\n.align\t32\nct_is_square_mod_384:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tlea\t8*3+255(%rsp), %rax\t# find closest 256-byte-aligned spot\n\tand\t\\$-256, %rax\t\t# in the frame...\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0(%rdi), @acc[0]\t# load input\n\tmov\t8*1(%rdi), @acc[1]\n\tmov\t8*2(%rdi), @acc[2]\n\tmov\t8*3(%rdi), @acc[3]\n\tmov\t8*4(%rdi), @acc[4]\n\tmov\t8*5(%rdi), @acc[5]\n\n\tmov\t8*0(%rsi), @acc[6]\t# load modulus\n\tmov\t8*1(%rsi), @acc[7]\n\tmov\t8*2(%rsi), %rbx\n\tmov\t8*3(%rsi), %rcx\n\tmov\t8*4(%rsi), %rdx\n\tmov\t8*5(%rsi), %rdi\n\tmov\t%rax, $in_ptr\t\t# pointer to source |a|b|\n\n\tmov\t@acc[0], 8*0(%rax)\t# copy input to |a|\n\tmov\t@acc[1], 8*1(%rax)\n\tmov\t@acc[2], 8*2(%rax)\n\tmov\t@acc[3], 8*3(%rax)\n\tmov\t@acc[4], 8*4(%rax)\n\tmov\t@acc[5], 8*5(%rax)\n\n\tmov\t@acc[6], 8*6(%rax)\t# copy modulus to |b|\n\tmov\t@acc[7], 8*7(%rax)\n\tmov\t%rbx,    8*8(%rax)\n\tmov\t%rcx,    8*9(%rax)\n\tmov\t%rdx,    8*10(%rax)\n\tmov\t%rdi,    8*11(%rax)\n\n\txor\t$L, $L\t\t\t# initialize the Legendre symbol\n\tmov\t\\$24, %ecx\t\t# 24 is 768/30-1\n\tjmp\t.Loop_is_square\n\n.align\t32\n.Loop_is_square:\n\tmov\t%ecx, 8*2(%rsp)\t\t# offload loop counter\n\n\tcall\t__ab_approximation_30\n\tmov\t$f0, 8*0(%rsp)\t\t# offload |f0| and |g0|\n\tmov\t$g0, 8*1(%rsp)\n\n\tmov\t\\$128+8*6, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmov\t8*0(%rsp), $f1\t\t# pop |f0| and |g0|\n\tmov\t8*1(%rsp), $g1\n\tlea\t-8*6($out_ptr),$out_ptr\t# pointer to destination |a|\n\tcall\t__smulq_384_n_shift_by_30\n\n\tmov\t8*2(%rsp), %ecx\t\t# re-load loop counter\n\txor\t\\$128, $in_ptr\t\t# flip-flop pointer to source |a|b|\n\n\tand\t8*6($out_ptr), @acc[6]\t# if |a| was negative, adjust |L|\n\tshr\t\\$1, @acc[6]\n\tadd\t@acc[6], $L\n\n\tsub\t\\$1, %ecx\n\tjnz\t.Loop_is_square\n\n\t################################# last iteration\n\t#call\t__ab_approximation_30\t# |a| and |b| are exact, just load\n\t#mov\t8*0($in_ptr), @acc[0]\t# |a_|\n\tmov\t8*6($in_ptr), @acc[1]\t# |b_|\n\tcall\t__inner_loop_48\t\t# 48 is 768%30+30\n\n\tmov\t\\$1, %rax\n\tand\t$L,  %rax\n\txor\t\\$1, %rax\t\t# return value\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tct_is_square_mod_384,.-ct_is_square_mod_384\n\n.type\t__smulq_384_n_shift_by_30,\\@abi-omnipotent\n.align\t32\n__smulq_384_n_shift_by_30:\n___\nfor($j=0; $j<2; $j++) {\n$code.=<<___;\n\tmov\t8*0($in_ptr), @acc[0]\t# load |a| (or |b|)\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\n\tmov\t%rdx, %rbx\t\t# |f1| (or |g1|)\n\tsar\t\\$63, %rdx\t\t# |f1|'s sign as mask (or |g1|'s)\n\txor\t%rax, %rax\n\tsub\t%rdx, %rax\t\t# |f1|'s sign as bit (or |g1|'s)\n\n\txor\t%rdx, %rbx\t\t# conditionally negate |f1| (or |g1|)\n\tadd\t%rax, %rbx\n\n\txor\t%rdx, @acc[0]\t\t# conditionally negate |a| (or |b|)\n\txor\t%rdx, @acc[1]\n\txor\t%rdx, @acc[2]\n\txor\t%rdx, @acc[3]\n\txor\t%rdx, @acc[4]\n\txor\t%rdx, @acc[5]\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tmov\t%rdx, @acc[6+$j]\n\tand\t%rbx, @acc[6+$j]\n\tmulq\t%rbx\t\t\t# |a|*|f1| (or |b|*|g1|)\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<5; $i++) {\n$code.=<<___;\n\tmulq\t%rbx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___;\n\tneg\t@acc[6+$j]\n\tmulq\t%rbx\n\tadd\t%rax, @acc[5]\n\tadc\t%rdx, @acc[6+$j]\n___\n$code.=<<___\tif ($j==0);\n\tlea\t8*6($in_ptr), $in_ptr\t# pointer to |b|\n\tmov\t$g1, %rdx\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n___\n}\n$code.=<<___;\n\tlea\t-8*6($in_ptr), $in_ptr\t# restore original in_ptr\n\n\tadd\t8*0($out_ptr), @acc[0]\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t8*4($out_ptr), @acc[4]\n\tadc\t8*5($out_ptr), @acc[5]\n\tadc\t@acc[7],       @acc[6]\n\n\tshrd\t\\$30, @acc[1], @acc[0]\n\tshrd\t\\$30, @acc[2], @acc[1]\n\tshrd\t\\$30, @acc[3], @acc[2]\n\tshrd\t\\$30, @acc[4], @acc[3]\n\tshrd\t\\$30, @acc[5], @acc[4]\n\tshrd\t\\$30, @acc[6], @acc[5]\n\n\tsar\t\\$63, @acc[6]\t\t# sign as mask\n\txor\t%rbx, %rbx\n\tsub\t@acc[6], %rbx\t\t# sign as bit\n\n\txor\t@acc[6], @acc[0]\t# conditionally negate the result\n\txor\t@acc[6], @acc[1]\n\txor\t@acc[6], @acc[2]\n\txor\t@acc[6], @acc[3]\n\txor\t@acc[6], @acc[4]\n\txor\t@acc[6], @acc[5]\n\tadd\t%rbx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\n\tret\n.size\t__smulq_384_n_shift_by_30,.-__smulq_384_n_shift_by_30\n___\n{\nmy ($a_, $b_) = @acc[0..1];\nmy ($t0, $t1, $t2, $t3, $t4, $t5) = map(\"%r$_\",(10..15));\nmy ($fg0, $fg1, $bias) = ($g0, $g1, $t5);\nmy $cnt = \"%edi\";\n{\nmy @a = @acc[0..5];\nmy @b = (@a[1..3], $t4, $t5, $g0);\n\n$code.=<<___;\n.type\t__ab_approximation_30,\\@abi-omnipotent\n.align\t32\n__ab_approximation_30:\n\tmov\t8*11($in_ptr), @b[5]\t# load |b| in reverse order\n\tmov\t8*10($in_ptr), @b[4]\n\tmov\t8*9($in_ptr),  @b[3]\n\n\tmov\t@a[5], %rax\n\tor\t@b[5], %rax\t\t# check top-most limbs, ...\n\tcmovz\t@a[4], @a[5]\n\tcmovz\t@b[4], @b[5]\n\tcmovz\t@a[3], @a[4]\n\tmov\t8*8($in_ptr), @b[2]\n\tcmovz\t@b[3], @b[4]\n\n\tmov\t@a[5], %rax\n\tor\t@b[5], %rax\t\t# ... ones before top-most, ...\n\tcmovz\t@a[4], @a[5]\n\tcmovz\t@b[4], @b[5]\n\tcmovz\t@a[2], @a[4]\n\tmov\t8*7($in_ptr), @b[1]\n\tcmovz\t@b[2], @b[4]\n\n\tmov\t@a[5], %rax\n\tor\t@b[5], %rax\t\t# ... and ones before that ...\n\tcmovz\t@a[4], @a[5]\n\tcmovz\t@b[4], @b[5]\n\tcmovz\t@a[1], @a[4]\n\tmov\t8*6($in_ptr), @b[0]\n\tcmovz\t@b[1], @b[4]\n\n\tmov\t@a[5], %rax\n\tor\t@b[5], %rax\t\t# ... and ones before that ...\n\tcmovz\t@a[4], @a[5]\n\tcmovz\t@b[4], @b[5]\n\tcmovz\t@a[0], @a[4]\n\tcmovz\t@b[0], @b[4]\n\n\tmov\t@a[5], %rax\n\tor\t@b[5], %rax\n\tbsr\t%rax, %rcx\n\tlea\t1(%rcx), %rcx\n\tcmovz\t@a[0], @a[5]\n\tcmovz\t@b[0], @b[5]\n\tcmovz\t%rax, %rcx\n\tneg\t%rcx\n\t#and\t\\$63, %rcx\t\t# debugging artefact\n\n\tshldq\t%cl, @a[4], @a[5]\t# align second limb to the left\n\tshldq\t%cl, @b[4], @b[5]\n\n\tmov\t\\$0xFFFFFFFF00000000, %rax\n\tmov\t@a[0]d, ${a_}d\n\tmov\t@b[0]d, ${b_}d\n\tand\t%rax, @a[5]\n\tand\t%rax, @b[5]\n\tor\t@a[5], ${a_}\n\tor\t@b[5], ${b_}\n\n\tjmp\t__inner_loop_30\n\n\tret\n.size\t__ab_approximation_30,.-__ab_approximation_30\n___\n}\n$code.=<<___;\n.type\t__inner_loop_30,\\@abi-omnipotent\n.align\t32\n__inner_loop_30:\t\t################# by Thomas Pornin\n\tmov\t\\$0x7FFFFFFF80000000, $fg0\t# |f0|=1, |g0|=0\n\tmov\t\\$0x800000007FFFFFFF, $fg1\t# |f1|=0, |g1|=1\n\tlea\t-1($fg0), $bias\t\t\t# 0x7FFFFFFF7FFFFFFF\n\tmov\t\\$30, $cnt\n\n.Loop_30:\n\t mov\t$a_, %rax\n\t and\t$b_, %rax\n\t shr\t\\$1, %rax\t\t# (a_ & b_) >> 1\n\n\tcmp\t$b_, $a_\t\t# if |a_|<|b_|, swap the variables\n\tmov\t$a_, $t0\n\tmov\t$b_, $t1\n\t lea\t(%rax,$L), %rax\t\t# pre-\"negate\" |L|\n\tmov\t$fg0, $t2\n\tmov\t$fg1, $t3\n\t mov\t$L,   $t4\n\tcmovb\t$b_, $a_\n\tcmovb\t$t0, $b_\n\tcmovb\t$fg1, $fg0\n\tcmovb\t$t2, $fg1\n\t cmovb\t%rax, $L\n\n\tsub\t$b_, $a_\t\t# |a_|-|b_|\n\tsub\t$fg1, $fg0\t\t# |f0|-|f1|, |g0|-|g1|\n\tadd\t$bias, $fg0\n\n\ttest\t\\$1, $t0\t\t# if |a_| was even, roll back \n\tcmovz\t$t0, $a_\n\tcmovz\t$t1, $b_\n\tcmovz\t$t2, $fg0\n\tcmovz\t$t3, $fg1\n\tcmovz\t$t4, $L\n\n\t lea\t2($b_), %rax\n\tshr\t\\$1, $a_\t\t# |a_|>>=1\n\t shr\t\\$2, %rax\n\tadd\t$fg1, $fg1\t\t# |f1|<<=1, |g1|<<=1\n\t lea\t(%rax,$L), $L\t\t# \"negate\" |L| if |b|%8 is 3 or 5\n\tsub\t$bias, $fg1\n\n\tsub\t\\$1, $cnt\n\tjnz\t.Loop_30\n\n\tshr\t\\$32, $bias\n\tmov\t%ebx, %eax\t\t# $fg0 -> $f0\n\tshr\t\\$32, $g0\n\tmov\t%ecx, %edx\t\t# $fg1 -> $f1\n\tshr\t\\$32, $g1\n\tsub\t$bias, $f0\t\t# remove the bias\n\tsub\t$bias, $g0\n\tsub\t$bias, $f1\n\tsub\t$bias, $g1\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=$a_\n.size\t__inner_loop_30,.-__inner_loop_30\n\n.type\t__inner_loop_48,\\@abi-omnipotent\n.align\t32\n__inner_loop_48:\n\tmov\t\\$48, $cnt\t\t# 48 is 768%30+30\n\n.Loop_48:\n\t mov\t$a_, %rax\n\t and\t$b_, %rax\n\t shr\t\\$1, %rax\t\t# (a_ & b_) >> 1\n\n\tcmp\t$b_, $a_\t\t# if |a_|<|b_|, swap the variables\n\tmov\t$a_, $t0\n\tmov\t$b_, $t1\n\t lea\t(%rax,$L), %rax\n\t mov\t$L,  $t2\n\tcmovb\t$b_, $a_\n\tcmovb\t$t0, $b_\n\t cmovb\t%rax, $L\n\n\tsub\t$b_, $a_\t\t# |a_|-|b_|\n\n\ttest\t\\$1, $t0\t\t# if |a_| was even, roll back \n\tcmovz\t$t0, $a_\n\tcmovz\t$t1, $b_\n\tcmovz\t$t2, $L\n\n\t lea\t2($b_), %rax\n\tshr\t\\$1, $a_\t\t# |a_|>>=1\n\t shr\t\\$2, %rax\n\t add\t%rax, $L\t\t# \"negate\" |L| if |b|%8 is 3 or 5\n\n\tsub\t\\$1, $cnt\n\tjnz\t.Loop_48\n\n\tret\n.size\t__inner_loop_48,.-__inner_loop_48\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ctq_inverse_mod_384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast Euclidean inversion as suggested in\n# https://eprint.iacr.org/2020/972. Performance is >5x better than\n# modulus-specific FLT addition chain...\n#\n# void ct_inverse_mod_384(vec768 ret, const vec384 inp, const vec384 mod);\n#\n$python_ref.=<<'___';\ndef ct_inverse_mod_384(inp, mod):\n    a, u = inp, 1\n    b, v = mod, 0\n\n    k = 62\n    w = 64\n    mask = (1 << w) - 1\n\n    for i in range(0, 768 // k):\n        # __ab_approximation_62\n        n = max(a.bit_length(), b.bit_length())\n        if n < 128:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-w)) << w)\n            b_ = (b & mask) | ((b >> (n-w)) << w)\n\n        # __inner_loop_62\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n\n        # __smulq_384_n_shift_by_62\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if a < 0:\n            a, f0, g0 = -a, -f0, -g0\n        if b < 0:\n            b, f1, g1 = -b, -f1, -g1\n\n        # __smulq_768x63\n        u, v = u*f0 + v*g0, u*f1 + v*g1\n\n    if 768 % k:\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, 768 % k):\n            if a & 1:\n                if a < b:\n                    a, b, f0, g0, f1, g1 = b, a, f1, g1, f0, g0\n                a, f0, g0 = a-b, f0-f1, g0-g1\n            a, f1, g1 = a >> 1, f1 << 1, g1 << 1\n\n        v = u*f1 + v*g1\n\n    mod <<= 768 - mod.bit_length()  # align to the left\n    if v < 0:\n        v += mod\n    if v < 0:\n        v += mod\n    elif v == 1<<768:\n        v -= mod\n\n    return v & (2**768 - 1) # to be reduced % mod\n___\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$code.=<<___ if ($flavour =~ /masm/);\n.extern\tct_inverse_mod_384\\$1\n___\n\nmy ($out_ptr, $in_ptr, $n_ptr, $nx_ptr) = (\"%rdi\", \"%rsi\", \"%rdx\", \"%rcx\");\nmy @acc=(map(\"%r$_\",(8..15)), \"%rbx\", \"%rbp\", $in_ptr, $out_ptr);\nmy ($f0, $g0, $f1, $g1) = (\"%rdx\",\"%rcx\",\"%r12\",\"%r13\");\nmy $cnt = \"%edi\";\n\n$frame = 8*11+2*512;\n\n$code.=<<___;\n.comm\t__blst_platform_cap,4\n.text\n\n.globl\tct_inverse_mod_384\n.hidden\tct_inverse_mod_384\n.type\tct_inverse_mod_384,\\@function,4,\"unwind\"\n.align\t32\nct_inverse_mod_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tct_inverse_mod_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tlea\t8*11+511(%rsp), %rax\t# find closest 512-byte-aligned spot\n\tand\t\\$-512, %rax\t\t# in the frame...\n\tmov\t$out_ptr, 8*4(%rsp)\n\tmov\t$nx_ptr, 8*5(%rsp)\n\n\tmov\t8*0($in_ptr), @acc[0]\t# load input\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\n\tmov\t8*0($n_ptr), @acc[6]\t# load modulus\n\tmov\t8*1($n_ptr), @acc[7]\n\tmov\t8*2($n_ptr), @acc[8]\n\tmov\t8*3($n_ptr), @acc[9]\n\tmov\t8*4($n_ptr), @acc[10]\n\tmov\t8*5($n_ptr), @acc[11]\n\n\tmov\t@acc[0], 8*0(%rax)\t# copy input to |a|\n\tmov\t@acc[1], 8*1(%rax)\n\tmov\t@acc[2], 8*2(%rax)\n\tmov\t@acc[3], 8*3(%rax)\n\tmov\t@acc[4], 8*4(%rax)\n\tmov\t@acc[5], 8*5(%rax)\n\n\tmov\t@acc[6], 8*6(%rax)\t# copy modulus to |b|\n\tmov\t@acc[7], 8*7(%rax)\n\tmov\t@acc[8], 8*8(%rax)\n\tmov\t@acc[9], 8*9(%rax)\n\tmov\t@acc[10], 8*10(%rax)\n\tmov\t%rax, $in_ptr\t\t# pointer to source |a|b|1|0|\n\tmov\t@acc[11], 8*11(%rax)\n\n\t################################# first iteration\n\tmov\t\\$62, $cnt\n\tcall\t__ab_approximation_62\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulq_384_n_shift_by_62\n\t#mov\t$f0, 8*7(%rsp)\t\t# corrected |f0|\n\t#mov\t$g0, 8*8(%rsp)\t\t# corrected |g0|\n\tmov\t$f0, 8*12($out_ptr)\t# initialize |u| with |f0|\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_384_n_shift_by_62\n\t#mov\t$f0, 8*9(%rsp)\t\t# corrected |f1|\n\t#mov\t$g0, 8*10(%rsp)\t\t# corrected |g1|\n\tmov\t$f0, 8*13($out_ptr)\t# initialize |v| with |f1|\n\n\t################################# second iteration\n\txor\t\\$256, $in_ptr\t\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$62, $cnt\n\tcall\t__ab_approximation_62\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\t$f0, 8*7(%rsp)\t\t# corrected |f0|\n\tmov\t$g0, 8*8(%rsp)\t\t# corrected |g0|\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_384_n_shift_by_62\n\t#mov\t$f0, 8*9(%rsp)\t\t# corrected |f1|\n\t#mov\t$g0, 8*10(%rsp)\t\t# corrected |g1|\n\n\tmov\t8*12($in_ptr), %rax\t# |u|\n\tmov\t8*19($in_ptr), @acc[3]\t# |v|\n\tmov\t$f0, %rbx\n\tmov\t%rax, @acc[2]\n\timulq\t8*7(%rsp)\t\t# |u|*|f0|\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[3], %rax\n\tmov\t%rdx, @acc[1]\n\timulq\t8*8(%rsp)\t\t# |v|*|g0|\n\tadd\t%rax, @acc[0]\n\tadc\t%rdx, @acc[1]\n\tmov\t@acc[0], 8*6($out_ptr)\t# destination |u|\n\tmov\t@acc[1], 8*7($out_ptr)\n\tsar\t\\$63, @acc[1]\t\t# sign extension\n\tmov\t@acc[1], 8*8($out_ptr)\n\tmov\t@acc[1], 8*9($out_ptr)\n\tmov\t@acc[1], 8*10($out_ptr)\n\tmov\t@acc[1], 8*11($out_ptr)\n\tmov\t@acc[1], 8*12($out_ptr)\n\tlea\t8*12($in_ptr),$in_ptr\t# make in_ptr \"rewindable\" with xor\n\n\tmov\t@acc[2], %rax\n\timulq\t%rbx\t\t\t# |u|*|f1|\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[3], %rax\n\tmov\t%rdx, @acc[1]\n\timulq\t%rcx\t\t\t# |v|*|g1|\n\tadd\t%rax, @acc[0]\n\tadc\t%rdx, @acc[1]\n\tmov\t@acc[0], 8*13($out_ptr)\t# destination |v|\n\tmov\t@acc[1], 8*14($out_ptr)\n\tsar\t\\$63, @acc[1]\t\t# sign extension\n\tmov\t@acc[1], 8*15($out_ptr)\n\tmov\t@acc[1], 8*16($out_ptr)\n\tmov\t@acc[1], 8*17($out_ptr)\n\tmov\t@acc[1], 8*18($out_ptr)\n\tmov\t@acc[1], 8*19($out_ptr)\n___\nfor($i=2; $i<11; $i++) {\nmy $smul_768x63  = $i>5 ? \"__smulq_768x63\"\n                        : \"__smulq_384x63\";\n$code.=<<___;\n\txor\t\\$256+8*12, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$62, $cnt\n\tcall\t__ab_approximation_62\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\t$f0, 8*7(%rsp)\t\t# corrected |f0|\n\tmov\t$g0, 8*8(%rsp)\t\t# corrected |g0|\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulq_384_n_shift_by_62\n\tmov\t$f0, 8*9(%rsp)\t\t# corrected |f1|\n\tmov\t$g0, 8*10(%rsp)\t\t# corrected |g1|\n\n\tmov\t8*7(%rsp), $f0\t\t# |f0|\n\tmov\t8*8(%rsp), $g0\t\t# |g0|\n\tlea\t8*12($in_ptr), $in_ptr\t# pointer to source |u|v|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |u|\n\tcall\t__smulq_384x63\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*7($out_ptr),$out_ptr\t# pointer to destination |v|\n\tcall\t$smul_768x63\n___\n$code.=<<___\tif ($i==5);\n\tmov\t@acc[6], 8*7($out_ptr)\t# sign extension\n\tmov\t@acc[6], 8*8($out_ptr)\n\tmov\t@acc[6], 8*9($out_ptr)\n\tmov\t@acc[6], 8*10($out_ptr)\n\tmov\t@acc[6], 8*11($out_ptr)\n___\n}\n$code.=<<___;\n\t################################# iteration before last\n\txor\t\\$256+8*12, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$62, $cnt\n\t#call\t__ab_approximation_62\t# |a| and |b| are exact, just load\n\tmov\t8*0($in_ptr), @acc[0]\t# |a_lo|\n\tmov\t8*1($in_ptr), @acc[1]\t# |a_hi|\n\tmov\t8*6($in_ptr), @acc[2]\t# |b_lo|\n\tmov\t8*7($in_ptr), @acc[3]\t# |b_hi|\n\tcall\t__inner_loop_62\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[2], 8*6($out_ptr)\n\n\t#mov\t8*7(%rsp), $f0\t\t# |f0|\n\t#mov\t8*8(%rsp), $g0\t\t# |g0|\n\tlea\t8*12($in_ptr), $in_ptr\t# pointer to source |u|v|\n\tlea\t8*12($out_ptr),$out_ptr\t# pointer to destination |u|\n\tcall\t__smulq_384x63\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*7($out_ptr),$out_ptr\t# pointer to destination |v|\n\tcall\t__smulq_768x63\n\n\t################################# last iteration\n\txor\t\\$256+8*12, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$24, $cnt\t\t# 768 % 62\n\t#call\t__ab_approximation_62\t# |a| and |b| are exact, just load\n\tmov\t8*0($in_ptr), @acc[0]\t# |a_lo|\n\txor\t@acc[1],      @acc[1]\t# |a_hi|\n\tmov\t8*6($in_ptr), @acc[2]\t# |b_lo|\n\txor\t@acc[3],   @acc[3]\t# |b_hi|\n\tcall\t__inner_loop_62\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\t#mov\t$f1, 8*9(%rsp)\n\t#mov\t$g1, 8*10(%rsp)\n\n\t#mov\t8*7(%rsp), $f0\t\t# |f0|\n\t#mov\t8*8(%rsp), $g0\t\t# |g0|\n\tlea\t8*12($in_ptr), $in_ptr\t# pointer to source |u|v|\n\t#lea\t8*6($out_ptr), $out_ptr\t# pointer to destination |u|\n\t#call\t__smulq_384x63\n\n\t#mov\t8*9(%rsp), $f0\t\t# |f1|\n\t#mov\t8*10(%rsp), $g0\t\t# |g1|\n\tmov\t$f1, $f0\n\tmov\t$g1, $g0\n\tmov\t8*4(%rsp), $out_ptr\t# original out_ptr\n\tcall\t__smulq_768x63\n\n\tmov\t8*5(%rsp), $in_ptr\t# original n_ptr\n\tmov\t%rdx, @acc[5]\t\t# the excess limb, -1, 0, or 1\n\tsar\t\\$63, @acc[5]\t\t# result's sign as mask\n\n\tmov\t@acc[5], @acc[0]\t# mask |modulus|\n\tmov\t@acc[5], @acc[1]\n\tmov\t@acc[5], @acc[2]\n\tand\t8*0($in_ptr), @acc[0]\n\tand\t8*1($in_ptr), @acc[1]\n\tmov\t@acc[5], @acc[3]\n\tand\t8*2($in_ptr), @acc[2]\n\tand\t8*3($in_ptr), @acc[3]\n\tmov\t@acc[5], @acc[4]\n\tand\t8*4($in_ptr), @acc[4]\n\tand\t8*5($in_ptr), @acc[5]\n\n\tadd\t@acc[0], @acc[6]\t# conditionally add |modulus|<<384\n\tadc\t@acc[1], @acc[7]\n\tadc\t@acc[2], @acc[8]\n\tadc\t@acc[3], @acc[9]\n\tadc\t@acc[4], %rcx\n\tadc\t@acc[5], %rax\n\tadc\t\\$0, %rdx\n\n\tmov\t%rdx, @acc[5]\n\tneg\t%rdx\n\tor\t%rdx, @acc[5]\t\t# excess bit or sign as mask\n\tsar\t\\$63, %rdx\t\t# excess bit as mask\n\n\tmov\t@acc[5], @acc[0]\t# mask |modulus|\n\tmov\t@acc[5], @acc[1]\n\tmov\t@acc[5], @acc[2]\n\tand\t8*0($in_ptr), @acc[0]\n\tand\t8*1($in_ptr), @acc[1]\n\tmov\t@acc[5], @acc[3]\n\tand\t8*2($in_ptr), @acc[2]\n\tand\t8*3($in_ptr), @acc[3]\n\tmov\t@acc[5], @acc[4]\n\tand\t8*4($in_ptr), @acc[4]\n\tand\t8*5($in_ptr), @acc[5]\n\n\txor\t%rdx, @acc[0]\t\t# conditionally negate |modulus|\n\txor\t$in_ptr, $in_ptr\n\txor\t%rdx, @acc[1]\n\tsub\t%rdx, $in_ptr\n\txor\t%rdx, @acc[2]\n\txor\t%rdx, @acc[3]\n\txor\t%rdx, @acc[4]\n\txor\t%rdx, @acc[5]\n\tadd\t$in_ptr, @acc[0]\n\tadc\t\\$0,  @acc[1]\n\tadc\t\\$0,  @acc[2]\n\tadc\t\\$0,  @acc[3]\n\tadc\t\\$0,  @acc[4]\n\tadc\t\\$0,  @acc[5]\n\n\tadd\t@acc[0], @acc[6]\t# final adjustment for |modulus|<<384\n\tadc\t@acc[1], @acc[7]\n\tadc\t@acc[2], @acc[8]\n\tadc\t@acc[3], @acc[9]\n\tadc\t@acc[4], %rcx\n\tadc\t@acc[5], %rax\n\n\tmov\t@acc[6], 8*6($out_ptr)\t# store absolute value\n\tmov\t@acc[7], 8*7($out_ptr)\n\tmov\t@acc[8], 8*8($out_ptr)\n\tmov\t@acc[9], 8*9($out_ptr)\n\tmov\t%rcx,    8*10($out_ptr)\n\tmov\t%rax,    8*11($out_ptr)\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tct_inverse_mod_384,.-ct_inverse_mod_384\n___\n########################################################################\n# see corresponding commentary in ctx_inverse_mod_384-x86_64...\n{\nmy ($out_ptr, $in_ptr, $f0, $g0) = (\"%rdi\", \"%rsi\", \"%rdx\", \"%rcx\");\nmy @acc = map(\"%r$_\",(8..15),\"bx\",\"bp\",\"cx\",\"di\");\nmy $fx = @acc[9];\n\n$code.=<<___;\n.type\t__smulq_768x63,\\@abi-omnipotent\n.align\t32\n__smulq_768x63:\n\tmov\t8*0($in_ptr), @acc[0]\t# load |u|\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\tmov\t8*6($in_ptr), @acc[6]\t# sign limb\n\n\tmov\t$f0, $fx\n\tsar\t\\$63, $f0\t\t# |f0|'s sign as mask\n\txor\t%rax, %rax\n\tsub\t$f0, %rax\t\t# |f0|'s sign as bit\n\n\tmov\t$out_ptr, 8*1(%rsp)\n\tmov\t$in_ptr, 8*2(%rsp)\n\tlea\t8*7($in_ptr), $in_ptr\t# pointer to |v|\n\n\txor\t$f0, $fx\t\t# conditionally negate |f0|\n\tadd\t%rax, $fx\n\n\txor\t$f0, @acc[0]\t\t# conditionally negate |u|\n\txor\t$f0, @acc[1]\n\txor\t$f0, @acc[2]\n\txor\t$f0, @acc[3]\n\txor\t$f0, @acc[4]\n\txor\t$f0, @acc[5]\n\txor\t$f0, @acc[6]\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\n\tmulq\t$fx\t\t\t# |u|*|f0|\n\tmov\t%rax, 8*0($out_ptr)\t# offload |u|*|f0|\n\tmov\t@acc[1], %rax\n\tand\t$fx, @acc[6]\n\tneg\t@acc[6]\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<5; $i++) {\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n\tmov\t@acc[$i], 8*$i($out_ptr)\n___\n}\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tadc\t%rdx, @acc[6]\n\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\tsar\t\\$63, @acc[6]\t\t# sign extension\n\tmov\t@acc[6], 8*7($out_ptr)\n___\n{\nmy $fx=$in_ptr;\n$code.=<<___;\n\tmov\t$g0, $f0\t\t# load |g0|\n\n\tmov\t8*0($in_ptr), @acc[0]\t# load |v|\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\tmov\t8*6($in_ptr), @acc[6]\n\tmov\t8*7($in_ptr), @acc[7]\n\tmov\t8*8($in_ptr), @acc[8]\n\tmov\t8*9($in_ptr), @acc[9]\n\tmov\t8*10($in_ptr), @acc[10]\n\tmov\t8*11($in_ptr), @acc[11]\n\n\tmov\t$f0, $fx\t\t# overrides in_ptr\n\tsar\t\\$63, $f0\t\t# |g0|'s sign as mask\n\txor\t%rax, %rax\n\tsub\t$f0, %rax\t\t# |g0|'s sign as bit\n\n\txor\t$f0, $fx\t\t# conditionally negate |g0|\n\tadd\t%rax, $fx\n\n\txor\t$f0, @acc[0]\t\t# conditionally negate |v|\n\txor\t$f0, @acc[1]\n\txor\t$f0, @acc[2]\n\txor\t$f0, @acc[3]\n\txor\t$f0, @acc[4]\n\txor\t$f0, @acc[5]\n\txor\t$f0, @acc[6]\n\txor\t$f0, @acc[7]\n\txor\t$f0, @acc[8]\n\txor\t$f0, @acc[9]\n\txor\t$f0, @acc[10]\n\txor\t$f0, @acc[11]\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\tadc\t\\$0, @acc[7]\n\tadc\t\\$0, @acc[8]\n\tadc\t\\$0, @acc[9]\n\tadc\t\\$0, @acc[10]\n\tadc\t\\$0, @acc[11]\n\n\tmulq\t$fx\t\t\t# |v|*|g0|\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<11; $i++) {\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___;\n\timulq\t$fx\n\tmov\t8*1(%rsp), $in_ptr\t# borrow for out_ptr\n\tadd\t@acc[11], %rax\n\tadc\t\\$0, %rdx\t\t# used in the final step\n\n\tadd\t8*0($in_ptr), @acc[0]\t# accumulate |u|*|f0|\n\tadc\t8*1($in_ptr), @acc[1]\n\tadc\t8*2($in_ptr), @acc[2]\n\tadc\t8*3($in_ptr), @acc[3]\n\tadc\t8*4($in_ptr), @acc[4]\n\tadc\t8*5($in_ptr), @acc[5]\n\tadc\t8*6($in_ptr), @acc[6]\n\tmov\t8*7($in_ptr), @acc[11]\t# sign extension\n\tadc\t@acc[11], @acc[7]\n\tadc\t@acc[11], @acc[8]\n\tadc\t@acc[11], @acc[9]\n\tadc\t@acc[11], @acc[10]\n\tadc\t@acc[11], %rax\n\tadc\t@acc[11], %rdx\n\n\tlea\t($in_ptr), $out_ptr\t# restore original out_ptr\n\tmov\t8*2(%rsp), $in_ptr\t# restore original in_ptr\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\tmov\t@acc[7], 8*7($out_ptr)\n\tmov\t@acc[8], 8*8($out_ptr)\n\tmov\t@acc[9], 8*9($out_ptr)\n\tmov\t@acc[10], 8*10($out_ptr)\n\tmov\t%rax,     8*11($out_ptr)\n\n\tret\n.size\t__smulq_768x63,.-__smulq_768x63\n___\n}\n$code.=<<___;\n.type\t__smulq_384x63,\\@abi-omnipotent\n.align\t32\n__smulq_384x63:\n___\nfor($j=0; $j<2; $j++) {\n$code.=<<___;\n\tmov\t8*0($in_ptr), @acc[0]\t# load |u| (or |v|)\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\tmov\t8*6($in_ptr), @acc[6]\t# sign/excess limb\n\n\tmov\t%rdx, $fx\n\tsar\t\\$63, %rdx\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t%rax, %rax\n\tsub\t%rdx, %rax\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t%rdx, $fx\t\t# conditionally negate |f0|\n\tadd\t%rax, $fx\n\n\txor\t%rdx, @acc[0]\t\t# conditionally negate |u| (or |v|)\n\txor\t%rdx, @acc[1]\n\txor\t%rdx, @acc[2]\n\txor\t%rdx, @acc[3]\n\txor\t%rdx, @acc[4]\n\txor\t%rdx, @acc[5]\n\txor\t%rdx, @acc[6]\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\n\tmulq\t$fx\t\t\t# |u|*|f0| (or |v|*|g0|)\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tand\t$fx, @acc[6]\n\tneg\t@acc[6]\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<5; $i++) {\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___\tif ($j==0);\n\tmulq\t$fx\n\tadd\t%rax, @acc[5]\n\tadc\t%rdx, @acc[6]\n\n\tlea\t8*7($in_ptr), $in_ptr\t# pointer to |v|\n\tmov\t$g0, %rdx\n\n\tmov\t@acc[0], 8*0($out_ptr)\t# offload |u|*|f0|\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], @acc[7]\n\tmov\t@acc[6], @acc[8]\n___\n}\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[5]\n\tadc\t%rdx, @acc[6]\n\n\tlea\t-8*7($in_ptr), $in_ptr\t# restore original in_ptr\n\n\tadd\t8*0($out_ptr), @acc[0]\t# accumulate |u|*|f0|\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t8*4($out_ptr), @acc[4]\n\tadc\t@acc[7],       @acc[5]\n\tadc\t@acc[8],       @acc[6]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\n\tret\n.size\t__smulq_384x63,.-__smulq_384x63\n___\n{\n$code.=<<___;\n.type\t__smulq_384_n_shift_by_62,\\@abi-omnipotent\n.align\t32\n__smulq_384_n_shift_by_62:\n\tmov\t$f0, @acc[8]\n___\nmy $f0 = @acc[8];\nfor($j=0; $j<2; $j++) {\nmy $top = $j==0 ? @acc[6] : @acc[7];\n$code.=<<___;\n\tmov\t8*0($in_ptr), @acc[0]\t# load |a| (or |b|)\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\n\tmov\t%rdx, $fx\n\tsar\t\\$63, %rdx\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t%rax, %rax\n\tsub\t%rdx, %rax\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t%rdx, $fx\t\t# conditionally negate |f0| (or |g0|)\n\tadd\t%rax, $fx\n\n\txor\t%rdx, @acc[0]\t\t# conditionally negate |a| (or |b|)\n\txor\t%rdx, @acc[1]\n\txor\t%rdx, @acc[2]\n\txor\t%rdx, @acc[3]\n\txor\t%rdx, @acc[4]\n\txor\t%rdx, @acc[5]\n\tmov\t%rdx, $top\n\tadd\t@acc[0], %rax\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tmulq\t$fx\t\t\t# |a|*|f0| (or |b|*|g0|)\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[1], %rax\n\tand\t$fx, $top\n\tneg\t$top\n\tmov\t%rdx, @acc[1]\n___\nfor($i=1; $i<5; $i++) {\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tmov\t@acc[$i+1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[$i+1]\n___\n}\n$code.=<<___\tif ($j==0);\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tadc\t%rdx, $top\n\n\tlea\t8*6($in_ptr), $in_ptr\t# pointer to |b|\n\tmov\t$g0, %rdx\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n___\n}\n$code.=<<___;\n\tmulq\t$fx\n\tadd\t%rax, @acc[$i]\n\tadc\t%rdx, @acc[7]\n\n\tlea\t-8*6($in_ptr), $in_ptr\t# restore original in_ptr\n\tmov\t$f0, %rdx\n\n\tadd\t8*0($out_ptr), @acc[0]\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t8*4($out_ptr), @acc[4]\n\tadc\t8*5($out_ptr), @acc[5]\n\tadc\t@acc[7],       @acc[6]\n\n\tshrd\t\\$62, @acc[1], @acc[0]\n\tshrd\t\\$62, @acc[2], @acc[1]\n\tshrd\t\\$62, @acc[3], @acc[2]\n\tshrd\t\\$62, @acc[4], @acc[3]\n\tshrd\t\\$62, @acc[5], @acc[4]\n\tshrd\t\\$62, @acc[6], @acc[5]\n\n\tsar\t\\$63, @acc[6]\t\t# sign as mask\n\txor\t$fx, $fx\n\tsub\t@acc[6], $fx\t\t# sign as bit\n\n\txor\t@acc[6], @acc[0]\t# conditionally negate the result\n\txor\t@acc[6], @acc[1]\n\txor\t@acc[6], @acc[2]\n\txor\t@acc[6], @acc[3]\n\txor\t@acc[6], @acc[4]\n\txor\t@acc[6], @acc[5]\n\tadd\t$fx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\n\txor\t@acc[6], %rdx\t\t# conditionally negate |f0|\n\txor\t@acc[6], $g0\t\t# conditionally negate |g0|\n\tadd\t$fx, %rdx\n\tadd\t$fx, $g0\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulq_384_n_shift_by_62,.-__smulq_384_n_shift_by_62\n___\n} }\n\n{\nmy ($a_lo, $a_hi, $b_lo, $b_hi) = map(\"%r$_\",(8..11));\nmy ($t0, $t1, $t2, $t3, $t4, $t5) = (\"%rax\",\"%rbx\",\"%rbp\",\"%r14\",\"%r15\",\"%rsi\");\n{\nmy @a = ($a_lo, $t1, $a_hi);\nmy @b = ($b_lo, $t2, $b_hi);\n\n$code.=<<___;\n.type\t__ab_approximation_62,\\@abi-omnipotent\n.align\t32\n__ab_approximation_62:\n\tmov\t8*5($in_ptr), @a[2]\t# load |a| in reverse order\n\tmov\t8*11($in_ptr), @b[2]\t# load |b| in reverse order\n\tmov\t8*4($in_ptr), @a[1]\n\tmov\t8*10($in_ptr), @b[1]\n\tmov\t8*3($in_ptr), @a[0]\n\tmov\t8*9($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# check top-most limbs, ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*2($in_ptr), @a[0]\n\tmov\t8*8($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# ... ones before top-most, ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*1($in_ptr), @a[0]\n\tmov\t8*7($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# ... and ones before that ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*0($in_ptr), @a[0]\n\tmov\t8*6($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\n\tbsr\t$t0, %rcx\n\tlea\t1(%rcx), %rcx\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t$t0, %rcx\n\tneg\t%rcx\n\t#and\t\\$63, %rcx\t\t# debugging artefact\n\n\tshldq\t%cl, @a[1], @a[2]\t# align second limb to the left\n\tshldq\t%cl, @b[1], @b[2]\n\n\tjmp\t__inner_loop_62\n\n\tret\n.size\t__ab_approximation_62,.-__ab_approximation_62\n___\n}\n$code.=<<___;\n.type\t__inner_loop_62,\\@abi-omnipotent\n.align\t8\n.long\t0\n__inner_loop_62:\n\tmov\t\\$1, $f0\t# |f0|=1\n\txor\t$g0, $g0\t# |g0|=0\n\txor\t$f1, $f1\t# |f1|=0\n\tmov\t\\$1, $g1\t# |g1|=1\n\tmov\t$in_ptr, 8(%rsp)\n\n.Loop_62:\n\txor\t$t0, $t0\n\txor\t$t1, $t1\n\ttest\t\\$1, $a_lo\t# if |a_| is odd, then we'll be subtracting |b_|\n\tmov\t$b_lo, $t2\n\tmov\t$b_hi, $t3\n\tcmovnz\t$b_lo, $t0\n\tcmovnz\t$b_hi, $t1\n\tsub\t$a_lo, $t2\t# |b_|-|a_|\n\tsbb\t$a_hi, $t3\n\tmov\t$a_lo, $t4\n\tmov\t$a_hi, $t5\n\tsub\t$t0, $a_lo\t# |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tsbb\t$t1, $a_hi\n\tcmovc\t$t2, $a_lo\t# borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcmovc\t$t3, $a_hi\n\tcmovc\t$t4, $b_lo\t# |b_| = |a_|\n\tcmovc\t$t5, $b_hi\n\tmov\t$f0, $t0\t# exchange |f0| and |f1|\n\tcmovc\t$f1, $f0\n\tcmovc\t$t0, $f1\n\tmov\t$g0, $t1\t# exchange |g0| and |g1|\n\tcmovc\t$g1, $g0\n\tcmovc\t$t1, $g1\n\txor\t$t0, $t0\n\txor\t$t1, $t1\n\tshrd\t\\$1, $a_hi, $a_lo\n\tshr\t\\$1, $a_hi\n\ttest\t\\$1, $t4\t# if |a_| was odd, then we'll be subtracting...\n\tcmovnz\t$f1, $t0\n\tcmovnz\t$g1, $t1\n\tadd\t$f1, $f1\t# |f1|<<=1\n\tadd\t$g1, $g1\t# |g1|<<=1\n\tsub\t$t0, $f0\t# |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\t$t1, $g0\t# |g0|-=|g1| (or |g0-=0| ...)\n\tsub\t\\$1, $cnt\n\tjnz\t.Loop_62\n\n\tmov\t8(%rsp), $in_ptr\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=$t0\n.size\t__inner_loop_62,.-__inner_loop_62\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/ctx_inverse_mod_384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Both constant-time and fast Euclidean inversion as suggested in\n# https://eprint.iacr.org/2020/972. Performance is >4x better than\n# modulus-specific FLT addition chain...\n#\n# void ct_inverse_mod_384(vec768 ret, const vec384 inp, const vec384 mod);\n#\n$python_ref.=<<'___';\ndef ct_inverse_mod_384(inp, mod):\n    a, u = inp, 1\n    b, v = mod, 0\n\n    k = 31\n    mask = (1 << k) - 1\n\n    for i in range(0, 768 // k):\n        # __ab_approximation_31\n        n = max(a.bit_length(), b.bit_length())\n        if n < 64:\n            a_, b_ = a, b\n        else:\n            a_ = (a & mask) | ((a >> (n-k-2)) << k)\n            b_ = (b & mask) | ((b >> (n-k-2)) << k)\n\n        # __inner_loop_31\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, k):\n            if a_ & 1:\n                if a_ < b_:\n                    a_, b_, f0, g0, f1, g1 = b_, a_, f1, g1, f0, g0\n                a_, f0, g0 = a_-b_, f0-f1, g0-g1\n            a_, f1, g1 = a_ >> 1, f1 << 1, g1 << 1\n\n        # __smulx_384_n_shift_by_31\n        a, b = (a*f0 + b*g0) >> k, (a*f1 + b*g1) >> k\n        if a < 0:\n            a, f0, g0 = -a, -f0, -g0\n        if b < 0:\n            b, f1, g1 = -b, -f1, -g1\n\n        # __smulx_768x63\n        u, v = u*f0 + v*g0, u*f1 + v*g1\n\n    if 768 % k:\n        f0, g0, f1, g1 = 1, 0, 0, 1\n        for j in range(0, 768 % k):\n            if a & 1:\n                if a < b:\n                    a, b, f0, g0, f1, g1 = b, a, f1, g1, f0, g0\n                a, f0, g0 = a-b, f0-f1, g0-g1\n            a, f1, g1 = a >> 1, f1 << 1, g1 << 1\n\n        v = u*f1 + v*g1\n\n    mod <<= 768 - mod.bit_length()  # align to the left\n    if v < 0:\n        v += mod\n    if v < 0:\n        v += mod\n    elif v == 1<<768:\n        v -= mod\n\n    return v & (2**768 - 1) # to be reduced % mod\n___\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$code.=<<___ if ($flavour =~ /masm/);\n.globl\tct_inverse_mod_384\\$1\n___\n\nmy ($out_ptr, $in_ptr, $n_ptr, $nx_ptr) = (\"%rdi\", \"%rsi\", \"%rdx\", \"%rcx\");\nmy @acc=(map(\"%r$_\",(8..15)), \"%rbx\", \"%rbp\", $in_ptr, $out_ptr);\nmy ($f0, $g0, $f1, $g1) = (\"%rdx\",\"%rcx\",\"%r12\",\"%r13\");\nmy $cnt = \"%edi\";\n\n$frame = 8*11+2*512;\n\n$code.=<<___;\n.text\n\n.globl\tctx_inverse_mod_384\n.hidden\tctx_inverse_mod_384\n.type\tctx_inverse_mod_384,\\@function,4,\"unwind\"\n.align\t32\nctx_inverse_mod_384:\n.cfi_startproc\nct_inverse_mod_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tlea\t8*11+511(%rsp), %rax\t# find closest 512-byte-aligned spot\n\tand\t\\$-512, %rax\t\t# in the frame...\n\tmov\t$out_ptr, 8*4(%rsp)\n\tmov\t$nx_ptr, 8*5(%rsp)\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($in_ptr), @acc[0]\t# load input\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\n\tmov\t8*0($n_ptr), @acc[6]\t# load modulus\n\tmov\t8*1($n_ptr), @acc[7]\n\tmov\t8*2($n_ptr), @acc[8]\n\tmov\t8*3($n_ptr), @acc[9]\n\tmov\t8*4($n_ptr), @acc[10]\n\tmov\t8*5($n_ptr), @acc[11]\n\n\tmov\t@acc[0], 8*0(%rax)\t# copy input to |a|\n\tmov\t@acc[1], 8*1(%rax)\n\tmov\t@acc[2], 8*2(%rax)\n\tmov\t@acc[3], 8*3(%rax)\n\tmov\t@acc[4], 8*4(%rax)\n\tmov\t@acc[5], 8*5(%rax)\n\n\tmov\t@acc[6], 8*6(%rax)\t# copy modulus to |b|\n\tmov\t@acc[7], 8*7(%rax)\n\tmov\t@acc[8], 8*8(%rax)\n\tmov\t@acc[9], 8*9(%rax)\n\tmov\t@acc[10], 8*10(%rax)\n\tmov\t%rax, $in_ptr\n\tmov\t@acc[11], 8*11(%rax)\n\n\t################################# first iteration\n\tmov\t\\$31, $cnt\n\tcall\t__ab_approximation_31\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulx_384_n_shift_by_31\n\t#mov\t$f0, 8*7(%rsp)\t\t# corrected |f0|\n\t#mov\t$g0, 8*8(%rsp)\t\t# corrected |g0|\n\tmov\t$f0, 8*12($out_ptr)\t# initialize |u| with |f0|\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulx_384_n_shift_by_31\n\t#mov\t$f0, 8*9(%rsp)\t\t# corrected |f1|\n\t#mov\t$g0, 8*10(%rsp)\t\t# corrected |g1|\n\tmov\t$f0, 8*13($out_ptr)\t# initialize |v| with |f1|\n\n\t################################# second iteration\n\txor\t\\$256, $in_ptr\t\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$31, $cnt\n\tcall\t__ab_approximation_31\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t__smulx_384_n_shift_by_31\n\tmov\t$f0, 8*7(%rsp)\t\t# corrected |f0|\n\tmov\t$g0, 8*8(%rsp)\t\t# corrected |g0|\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t__smulx_384_n_shift_by_31\n\t#mov\t$f0, 8*9(%rsp)\t\t# corrected |f1|\n\t#mov\t$g0, 8*10(%rsp)\t\t# corrected |g1|\n\n\tmov\t8*12($in_ptr), %rax\t# |u|\n\tmov\t8*19($in_ptr), @acc[3]\t# |v|\n\tmov\t$f0, %rbx\n\tmov\t%rax, @acc[2]\n\timulq\t8*7(%rsp)\t\t# |u|*|f0|\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[3], %rax\n\tmov\t%rdx, @acc[1]\n\timulq\t8*8(%rsp)\t\t# |v|*|g0|\n\tadd\t%rax, @acc[0]\n\tadc\t%rdx, @acc[1]\n\tmov\t@acc[0], 8*6($out_ptr)\t# destination |u|\n\tmov\t@acc[1], 8*7($out_ptr)\n\tsar\t\\$63, @acc[1]\t\t# sign extension\n\tmov\t@acc[1], 8*8($out_ptr)\n\tmov\t@acc[1], 8*9($out_ptr)\n\tmov\t@acc[1], 8*10($out_ptr)\n\tmov\t@acc[1], 8*11($out_ptr)\n\tmov\t@acc[1], 8*12($out_ptr)\n\tlea\t8*12($in_ptr), $in_ptr\t# make in_ptr \"rewindable\" with xor\n\n\tmov\t@acc[2], %rax\n\timulq\t%rbx\t\t\t# |u|*|f1|\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[3], %rax\n\tmov\t%rdx, @acc[1]\n\timulq\t%rcx\t\t\t# |v|*|g1|\n\tadd\t%rax, @acc[0]\n\tadc\t%rdx, @acc[1]\n\tmov\t@acc[0], 8*13($out_ptr)\t# destination |v|\n\tmov\t@acc[1], 8*14($out_ptr)\n\tsar\t\\$63, @acc[1]\t\t# sign extension\n\tmov\t@acc[1], 8*15($out_ptr)\n\tmov\t@acc[1], 8*16($out_ptr)\n\tmov\t@acc[1], 8*17($out_ptr)\n\tmov\t@acc[1], 8*18($out_ptr)\n\tmov\t@acc[1], 8*19($out_ptr)\n___\nfor($i=2; $i<23; $i++) {\nmy $smul_n_shift = $i<19 ? \"__smulx_384_n_shift_by_31\"\n                         : \"__smulx_191_n_shift_by_31\";\nmy $smul_768x63  = $i>11 ? \"__smulx_768x63\"\n                         : \"__smulx_384x63\";\n$code.=<<___;\n\txor\t\\$256+8*12, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$31, $cnt\n\tcall\t__ab_approximation_31\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\tmov\t$f1, 8*9(%rsp)\n\tmov\t$g1, 8*10(%rsp)\n\n\tmov\t\\$256, $out_ptr\n\txor\t$in_ptr, $out_ptr\t# pointer to destination |a|b|u|v|\n\tcall\t$smul_n_shift\n\tmov\t$f0, 8*7(%rsp)\t\t# corrected |f0|\n\tmov\t$g0, 8*8(%rsp)\t\t# corrected |g0|\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |b|\n\tcall\t$smul_n_shift\n\tmov\t$f0, 8*9(%rsp)\t\t# corrected |f1|\n\tmov\t$g0, 8*10(%rsp)\t\t# corrected |g1|\n\n\tmov\t8*7(%rsp), $f0\t\t# |f0|\n\tmov\t8*8(%rsp), $g0\t\t# |g0|\n\tlea\t8*12($in_ptr), $in_ptr\t# pointer to source |u|v|\n\tlea\t8*6($out_ptr), $out_ptr\t# pointer to destination |u|\n\tcall\t__smulx_384x63\n\n\tmov\t8*9(%rsp), $f0\t\t# |f1|\n\tmov\t8*10(%rsp), $g0\t\t# |g1|\n\tlea\t8*7($out_ptr),$out_ptr\t# pointer to destination |v|\n\tcall\t$smul_768x63\n___\n$code.=<<___\tif ($i==11);\n\tmov\t@acc[6], 8*7($out_ptr)\t# sign extension\n\tmov\t@acc[6], 8*8($out_ptr)\n\tmov\t@acc[6], 8*9($out_ptr)\n\tmov\t@acc[6], 8*10($out_ptr)\n\tmov\t@acc[6], 8*11($out_ptr)\n___\n}\n$code.=<<___;\n\t################################# two[!] last iterations in one go\n\txor\t\\$256+8*12, $in_ptr\t# flip-flop pointer to source |a|b|u|v|\n\tmov\t\\$55, $cnt\t\t# 31 + 768 % 31\n\t#call\t__ab_approximation_31\t# |a| and |b| are exact, just load\n\tmov\t8*0($in_ptr), @acc[0]\t# |a_lo|\n\t#xor\t@acc[1],      @acc[1]\t# |a_hi|\n\tmov\t8*6($in_ptr), @acc[2]\t# |b_lo|\n\t#xor\t@acc[3],      @acc[3]\t# |b_hi|\n\tcall\t__tail_loop_55\n\t#mov\t$f0, 8*7(%rsp)\n\t#mov\t$g0, 8*8(%rsp)\n\t#mov\t$f1, 8*9(%rsp)\n\t#mov\t$g1, 8*10(%rsp)\n\n\t#mov\t8*7(%rsp), $f0\t\t# |f0|\n\t#mov\t8*8(%rsp), $g0\t\t# |g0|\n\tlea\t8*12($in_ptr), $in_ptr\t# pointer to source |u|v|\n\t#lea\t8*6($out_ptr), $out_ptr\t# pointer to destination |u|\n\t#call\t__smulx_384x63\n\n\t#mov\t8*9(%rsp), $f0\t\t# |f1|\n\t#mov\t8*10(%rsp), $g0\t\t# |g1|\n\tmov\t$f1, $f0\n\tmov\t$g1, $g0\n\tmov\t8*4(%rsp), $out_ptr\t# original out_ptr\n\tcall\t__smulx_768x63\n\n\tmov\t8*5(%rsp), $in_ptr\t# original n_ptr\n\tmov\t%rdx, @acc[5]\t\t# the excess limb, -1, 0 or 1\n\tsar\t\\$63, @acc[5]\t\t# result's sign as mask\n\n\tmov\t@acc[5], @acc[0]\t# mask |modulus|\n\tmov\t@acc[5], @acc[1]\n\tmov\t@acc[5], @acc[2]\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tand\t8*0($in_ptr), @acc[0]\n\tand\t8*1($in_ptr), @acc[1]\n\tmov\t@acc[5], @acc[3]\n\tand\t8*2($in_ptr), @acc[2]\n\tand\t8*3($in_ptr), @acc[3]\n\tmov\t@acc[5], @acc[4]\n\tand\t8*4($in_ptr), @acc[4]\n\tand\t8*5($in_ptr), @acc[5]\n\n\tadd\t@acc[0], @acc[6]\t# conditionally add |modulus|<<384\n\tadc\t@acc[1], @acc[7]\n\tadc\t@acc[2], @acc[8]\n\tadc\t@acc[3], @acc[9]\n\tadc\t@acc[4], %rcx\n\tadc\t@acc[5], %rax\n\tadc\t\\$0, %rdx\n\n\tmov\t%rdx, @acc[5]\n\tneg\t%rdx\n\tor\t%rdx, @acc[5]\t\t# excess bit or sign as mask\n\tsar\t\\$63, %rdx\t\t# excess bit as mask\n\n\tmov\t@acc[5], @acc[0]\t# mask |modulus|\n\tmov\t@acc[5], @acc[1]\n\tmov\t@acc[5], @acc[2]\n\tand\t8*0($in_ptr), @acc[0]\n\tand\t8*1($in_ptr), @acc[1]\n\tmov\t@acc[5], @acc[3]\n\tand\t8*2($in_ptr), @acc[2]\n\tand\t8*3($in_ptr), @acc[3]\n\tmov\t@acc[5], @acc[4]\n\tand\t8*4($in_ptr), @acc[4]\n\tand\t8*5($in_ptr), @acc[5]\n\n\txor\t%rdx, @acc[0]\t\t# conditionally negate |modulus|\n\txor\t$in_ptr, $in_ptr\n\txor\t%rdx, @acc[1]\n\tsub\t%rdx, $in_ptr\n\txor\t%rdx, @acc[2]\n\txor\t%rdx, @acc[3]\n\txor\t%rdx, @acc[4]\n\txor\t%rdx, @acc[5]\n\tadd\t$in_ptr, @acc[0]\n\tadc\t\\$0,  @acc[1]\n\tadc\t\\$0,  @acc[2]\n\tadc\t\\$0,  @acc[3]\n\tadc\t\\$0,  @acc[4]\n\tadc\t\\$0,  @acc[5]\n\n\tadd\t@acc[0], @acc[6]\t# final adjustment for |modulus|<<384\n\tadc\t@acc[1], @acc[7]\n\tadc\t@acc[2], @acc[8]\n\tadc\t@acc[3], @acc[9]\n\tadc\t@acc[4], %rcx\n\tadc\t@acc[5], %rax\n\n\tmov\t@acc[6], 8*6($out_ptr)\t# store absolute value\n\tmov\t@acc[7], 8*7($out_ptr)\n\tmov\t@acc[8], 8*8($out_ptr)\n\tmov\t@acc[9], 8*9($out_ptr)\n\tmov\t%rcx,    8*10($out_ptr)\n\tmov\t%rax,    8*11($out_ptr)\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tctx_inverse_mod_384,.-ctx_inverse_mod_384\n___\n########################################################################\n# Signed |u|*|f?|+|v|*|g?| subroutines. \"NNN\" in \"NNNx63\" suffix refers\n# to the maximum bit-length of the *result*, and \"63\" - to the maximum\n# bit-length of the |f?| and |g?| single-limb multiplicands. However!\n# The latter should not be taken literally, as they are always chosen so\n# that \"bad things\" don't happen. For example, there comes a point when\n# |v| grows beyond 384 bits, while |u| remains 384 bits wide. Yet, we\n# always call __smul_384x63 to perform |u|*|f0|+|v|*|g0| step. This is\n# because past that point |f0| is always 1 and |g0| is always 0. And,\n# since |u| never grows beyond 384 bits, __smul_768x63 doesn't have to\n# perform full-width |u|*|f1| multiplication, half-width one with sign\n# extension is sufficient...\n{\nmy ($out_ptr, $in_ptr, $f0, $g0) = (\"%rdi\", \"%rsi\", \"%rdx\", \"%rcx\");\nmy @acc = map(\"%r$_\",(8..15),\"bx\",\"bp\",\"cx\",\"di\");\nmy $fx = @acc[9];\n\n$code.=<<___;\n.type\t__smulx_768x63,\\@abi-omnipotent\n.align\t32\n__smulx_768x63:\n\tmov\t8*0($in_ptr), @acc[0]\t# load |u|\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\tmov\t8*6($in_ptr), @acc[6]\t# sign limb\n\n\tmov\t$f0, %rax\n\tsar\t\\$63, %rax\t\t# |f0|'s sign as mask\n\txor\t$fx, $fx\t\t# overrides in_ptr\n\tsub\t%rax, $fx\t\t# |f0|'s sign as bit\n\n\tmov\t$out_ptr, 8*1(%rsp)\n\tmov\t$in_ptr,  8*2(%rsp)\n\tlea\t8*7($in_ptr), $in_ptr\t# pointer to |v|\n\n\txor\t%rax, $f0\t\t# conditionally negate |f0|\n\tadd\t$fx, $f0\n\n\txor\t%rax, @acc[0]\t\t# conditionally negate |u|\n\txor\t%rax, @acc[1]\n\txor\t%rax, @acc[2]\n\txor\t%rax, @acc[3]\n\txor\t%rax, @acc[4]\n\txor\t%rax, @acc[5]\n\txor\t%rax, @acc[6]\n\tadd\t$fx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\n\tand\t$f0, @acc[6]\n\tneg\t@acc[6]\n\n\tmulx\t@acc[0], @acc[0], $fx\t# |u|*|f0|\n\tmulx\t@acc[1], @acc[1], %rax\n\tadd\t$fx, @acc[1]\n___\nfor(my ($a,$b) = ($fx, \"%rax\"), $i=2; $i<=5; $i++) {\n$code.=<<___;\n\tmulx\t@acc[$i], @acc[$i], $a\n\tadc\t$b, @acc[$i]\n___\n    ($a, $b) = ($b, $a);\n}\n$code.=<<___;\n\tadc\t%rax, @acc[6]\n\n\tmov\t@acc[0], 8*0($out_ptr)\t# offload |u|*|f0|\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\tsar\t\\$63, @acc[6]\t\t# sign extension\n\tmov\t@acc[6], 8*7($out_ptr)\n___\n{\nmy $fx=$in_ptr;\n$code.=<<___;\n\tmov\t$g0, $f0\t\t# load |g0|\n\tmov\t$g0, %rax\n\n\tmov\t8*0($in_ptr), @acc[0]\t# load |v|\n\tmov\t8*1($in_ptr), @acc[1]\n\tmov\t8*2($in_ptr), @acc[2]\n\tmov\t8*3($in_ptr), @acc[3]\n\tmov\t8*4($in_ptr), @acc[4]\n\tmov\t8*5($in_ptr), @acc[5]\n\tmov\t8*6($in_ptr), @acc[6]\n\tmov\t8*7($in_ptr), @acc[7]\n\tmov\t8*8($in_ptr), @acc[8]\n\tmov\t8*9($in_ptr), @acc[9]\n\tmov\t8*10($in_ptr), @acc[10]\n\tmov\t8*11($in_ptr), @acc[11]\n\n\tsar\t\\$63, %rax\t\t# |g0|'s sign as mask\n\txor\t$fx, $fx\t\t# overrides in_ptr\n\tsub\t%rax, $fx\t\t# |g0|'s sign as bit\n\n\txor\t%rax, $f0\t\t# conditionally negate |g0|\n\tadd\t$fx, $f0\n\n\txor\t%rax, @acc[0]\t\t# conditionally negate |v|\n\txor\t%rax, @acc[1]\n\txor\t%rax, @acc[2]\n\txor\t%rax, @acc[3]\n\txor\t%rax, @acc[4]\n\txor\t%rax, @acc[5]\n\txor\t%rax, @acc[6]\n\txor\t%rax, @acc[7]\n\txor\t%rax, @acc[8]\n\txor\t%rax, @acc[9]\n\txor\t%rax, @acc[10]\n\txor\t@acc[11], %rax\n\tadd\t$fx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\tadc\t\\$0, @acc[7]\n\tadc\t\\$0, @acc[8]\n\tadc\t\\$0, @acc[9]\n\tadc\t\\$0, @acc[10]\n\tadc\t\\$0, %rax\n\n\tmulx\t@acc[0], @acc[0], $fx\t# |v|*|g0|\n\tmulx\t@acc[1], @acc[1], @acc[11]\n\tadd\t$fx, @acc[1]\n___\nfor(my ($a,$b) = ($fx, @acc[11]), $i=2; $i<11; $i++) {\n$code.=<<___;\n\tmulx\t@acc[$i], @acc[$i], $a\n\tadc\t$b, @acc[$i]\n___\n    ($a, $b) = ($b, $a);\n}\n$code.=<<___;\n\tmov\t8*1(%rsp), $out_ptr\t# restore original out_ptr\n\tadc\t\\$0, $fx\n\timulq\t%rdx\n\tadd\t$fx, %rax\n\tadc\t\\$0, %rdx\t\t# used in the final step\n\n\tadd\t8*0($out_ptr), @acc[0]\t# accumulate |u|*|f0|\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t8*4($out_ptr), @acc[4]\n\tadc\t8*5($out_ptr), @acc[5]\n\tadc\t8*6($out_ptr), @acc[6]\n\tmov\t8*7($out_ptr), $fx\t# sign extension\n\tadc\t$fx, @acc[7]\n\tadc\t$fx, @acc[8]\n\tadc\t$fx, @acc[9]\n\tadc\t$fx, @acc[10]\n\tadc\t$fx, %rax\n\tadc\t$fx, %rdx\n\n\tmov\t8*2(%rsp), $in_ptr\t# restore original in_ptr\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\tmov\t@acc[7], 8*7($out_ptr)\n\tmov\t@acc[8], 8*8($out_ptr)\n\tmov\t@acc[9], 8*9($out_ptr)\n\tmov\t@acc[10], 8*10($out_ptr)\n\tmov\t%rax,     8*11($out_ptr)\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulx_768x63,.-__smulx_768x63\n___\n}\n$code.=<<___;\n.type\t__smulx_384x63,\\@abi-omnipotent\n.align\t32\n__smulx_384x63:\n___\nfor($j=0; $j<2; $j++) {\nmy $k = 8*7*$j;\n$code.=<<___;\n\tmov\t$k+8*0($in_ptr), @acc[0] # load |u| (or |v|)\n\tmov\t$k+8*1($in_ptr), @acc[1]\n\tmov\t$k+8*2($in_ptr), @acc[2]\n\tmov\t$k+8*3($in_ptr), @acc[3]\n\tmov\t$k+8*4($in_ptr), @acc[4]\n\tmov\t$k+8*5($in_ptr), @acc[5]\n\tmov\t$k+8*6($in_ptr), @acc[6] # sign/excess limb\n\n\tmov\t$f0, $fx\n\tsar\t\\$63, $fx\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t%rax, %rax\n\tsub\t$fx, %rax\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t$fx, $f0\t\t# conditionally negate |f0|\n\tadd\t%rax, $f0\n\n\txor\t$fx, @acc[0]\t\t# conditionally negate |u| (or |v|)\n\txor\t$fx, @acc[1]\n\txor\t$fx, @acc[2]\n\txor\t$fx, @acc[3]\n\txor\t$fx, @acc[4]\n\txor\t$fx, @acc[5]\n\txor\t$fx, @acc[6]\n\tadd\t%rax, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n\n\tand\t$f0, @acc[6]\n\tneg\t@acc[6]\n\n\tmulx\t@acc[0], @acc[0], $fx\t# |u|*|f0| (or |v|*|g0|)\n\tmulx\t@acc[1], @acc[1], %rax\n\tadd\t$fx, @acc[1]\n___\nfor(my ($a,$b) = ($fx, \"%rax\"), $i=2; $i<5; $i++) {\n$code.=<<___;\n\tmulx\t@acc[$i], @acc[$i], $a\n\tadc\t$b, @acc[$i]\n___\n    ($a, $b) = ($b, $a);\n}\n$code.=<<___\tif ($j==0);\n\tmulx\t@acc[$i], @acc[$i], %rax\n\tmov\t$g0, $f0\n\tadc\t$fx, @acc[$i]\n\tadc\t%rax, @acc[6]\n\n\tmov\t@acc[0], 8*0($out_ptr)\t# offload |u|*|f0|\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], @acc[7]\n\tmov\t@acc[6], @acc[8]\n___\n}\n$code.=<<___;\n\tmulx\t@acc[$i], @acc[$i], %rax\n\tadc\t$fx, @acc[$i]\n\tadc\t%rax, @acc[6]\n\n\tadd\t8*0($out_ptr), @acc[0]\t# accumulate |u|*|f0|\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t8*4($out_ptr), @acc[4]\n\tadc\t@acc[7],       @acc[5]\n\tadc\t@acc[8],       @acc[6]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], 8*6($out_ptr)\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulx_384x63,.-__smulx_384x63\n___\n########################################################################\n# Signed abs(|a|*|f?|+|b|*|g?|)>>k subroutines. \"NNN\" in the middle of\n# the names refers to maximum bit-lengths of |a| and |b|. As already\n# mentioned, |f?| and |g?| can be viewed as 63 bits wide, but are always\n# chosen so that \"bad things\" don't happen. For example, so that the\n# sum of the products doesn't overflow, and that the final result is\n# never wider than inputs...\n{\n$code.=<<___;\n.type\t__smulx_384_n_shift_by_31,\\@abi-omnipotent\n.align\t32\n__smulx_384_n_shift_by_31:\n\tmov\t$f0, @acc[8]\t\t# make backup copy\n___\nmy $f0 = @acc[8];\nfor($j=0; $j<2; $j++) {\nmy $k = 8*6*$j;\n$code.=<<___;\n\tmov\t$k+8*0($in_ptr), @acc[0] # load |a| (or |b|)\n\tmov\t$k+8*1($in_ptr), @acc[1]\n\tmov\t$k+8*2($in_ptr), @acc[2]\n\tmov\t$k+8*3($in_ptr), @acc[3]\n\tmov\t$k+8*4($in_ptr), @acc[4]\n\tmov\t$k+8*5($in_ptr), @acc[5]\n\n\tmov\t%rdx, %rax\n\tsar\t\\$63, %rax\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t$fx, $fx\n\tsub\t%rax, $fx\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t%rax, %rdx\t\t# conditionally negate |f0| (or |g0|)\n\tadd\t$fx, %rdx\n\n\txor\t%rax, @acc[0]\t\t# conditionally negate |a| (or |b|)\n\txor\t%rax, @acc[1]\n\txor\t%rax, @acc[2]\n\txor\t%rax, @acc[3]\n\txor\t%rax, @acc[4]\n\txor\t%rax, @acc[5]\n\tadd\t$fx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tand\t%rdx, %rax\n\tneg\t%rax\n\n\tmulx\t@acc[0], @acc[0], $fx\t# |a|*|f0| (or |b|*|g0|)\n\tmulx\t@acc[1], @acc[1], @acc[6]\n\tadd\t$fx, @acc[1]\n___\nfor(my ($a,$b) = ($fx, @acc[6]), $i=2; $i<5; $i++) {\n$code.=<<___;\n\tmulx\t@acc[$i], @acc[$i], $a\n\tadc\t$b, @acc[$i]\n___\n    ($a, $b) = ($b, $a);\n}\n$code.=<<___\tif ($j==0);\n\tmulx\t@acc[5], @acc[5], @acc[6]\n\tadc\t$fx, @acc[5]\n\tadc\t%rax, @acc[6]\n\n\tmov\t$g0, %rdx\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\tmov\t@acc[6], @acc[7]\n___\n}\n$code.=<<___;\n\tmulx\t@acc[5], @acc[5], @acc[6]\n\tadc\t$fx, @acc[5]\n\tadc\t%rax, @acc[6]\n\n\tadd\t8*0($out_ptr), @acc[0]\n\tadc\t8*1($out_ptr), @acc[1]\n\tadc\t8*2($out_ptr), @acc[2]\n\tadc\t8*3($out_ptr), @acc[3]\n\tadc\t8*4($out_ptr), @acc[4]\n\tadc\t8*5($out_ptr), @acc[5]\n\tadc\t@acc[7],       @acc[6]\n\tmov\t$f0, %rdx\t\t# restore the original value\n\n\tshrd\t\\$31, @acc[1], @acc[0]\n\tshrd\t\\$31, @acc[2], @acc[1]\n\tshrd\t\\$31, @acc[3], @acc[2]\n\tshrd\t\\$31, @acc[4], @acc[3]\n\tshrd\t\\$31, @acc[5], @acc[4]\n\tshrd\t\\$31, @acc[6], @acc[5]\n\n\tsar\t\\$63, @acc[6]\t\t# sign as mask\n\txor\t$fx, $fx\n\tsub\t@acc[6], $fx\t\t# sign as bit\n\n\txor\t@acc[6], @acc[0]\t# conditionally negate the result\n\txor\t@acc[6], @acc[1]\n\txor\t@acc[6], @acc[2]\n\txor\t@acc[6], @acc[3]\n\txor\t@acc[6], @acc[4]\n\txor\t@acc[6], @acc[5]\n\tadd\t$fx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, @acc[2]\n\tadc\t\\$0, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tmov\t@acc[0], 8*0($out_ptr)\n\tmov\t@acc[1], 8*1($out_ptr)\n\tmov\t@acc[2], 8*2($out_ptr)\n\tmov\t@acc[3], 8*3($out_ptr)\n\tmov\t@acc[4], 8*4($out_ptr)\n\tmov\t@acc[5], 8*5($out_ptr)\n\n\txor\t@acc[6], %rdx\t\t# conditionally negate |f0|\n\txor\t@acc[6], $g0\t\t# conditionally negate |g0|\n\tadd\t$fx, %rdx\n\tadd\t$fx, $g0\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulx_384_n_shift_by_31,.-__smulx_384_n_shift_by_31\n___\n} {\n$code.=<<___;\n.type\t__smulx_191_n_shift_by_31,\\@abi-omnipotent\n.align\t32\n__smulx_191_n_shift_by_31:\n\tmov\t$f0, @acc[8]\n___\nmy $f0 = @acc[8];\nfor($j=0; $j<2; $j++) {\nmy $k = 8*6*$j;\nmy @acc=@acc;\n   @acc=@acc[3..5] if ($j);\n$code.=<<___;\n\tmov\t$k+8*0($in_ptr), @acc[0] # load |a| (or |b|)\n\tmov\t$k+8*1($in_ptr), @acc[1]\n\tmov\t$k+8*2($in_ptr), @acc[2]\n\n\tmov\t%rdx, %rax\n\tsar\t\\$63, %rax\t\t# |f0|'s sign as mask (or |g0|'s)\n\txor\t$fx, $fx\n\tsub\t%rax, $fx\t\t# |f0|'s sign as bit (or |g0|'s)\n\n\txor\t%rax, %rdx\t\t# conditionally negate |f0| (or |g0|)\n\tadd\t$fx, %rdx\n\n\txor\t%rax, @acc[0]\t\t# conditionally negate |a| (or |b|)\n\txor\t%rax, @acc[1]\n\txor\t@acc[2], %rax\n\tadd\t$fx, @acc[0]\n\tadc\t\\$0, @acc[1]\n\tadc\t\\$0, %rax\n\n\tmulx\t@acc[0], @acc[0], $fx\t# |a|*|f0| (or |b|*|g0|)\n\tmulx\t@acc[1], @acc[1], @acc[2]\n\tadd\t$fx, @acc[1]\n\tadc\t\\$0, @acc[2]\n\timulq\t%rdx\n\tadd\t%rax, @acc[2]\n\tadc\t\\$0, %rdx\n___\n$code.=<<___\tif ($j==0);\n\tmov\t%rdx, @acc[6]\n\tmov\t$g0, %rdx\n___\n}\n$code.=<<___;\n\tadd\t@acc[0], @acc[3]\n\tadc\t@acc[1], @acc[4]\n\tadc\t@acc[2], @acc[5]\n\tadc\t%rdx,    @acc[6]\n\tmov\t$f0, %rdx\n\n\tshrd\t\\$31, @acc[4], @acc[3]\n\tshrd\t\\$31, @acc[5], @acc[4]\n\tshrd\t\\$31, @acc[6], @acc[5]\n\n\tsar\t\\$63, @acc[6]\t\t# sign as mask\n\txor\t$fx, $fx\n\tsub\t@acc[6], $fx\t\t# sign as bit\n\n\txor\t@acc[6], @acc[3]\t# conditionally negate the result\n\txor\t@acc[6], @acc[4]\n\txor\t@acc[6], @acc[5]\n\tadd\t$fx, @acc[3]\n\tadc\t\\$0, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\tmov\t@acc[3], 8*0($out_ptr)\n\tmov\t@acc[4], 8*1($out_ptr)\n\tmov\t@acc[5], 8*2($out_ptr)\n\n\txor\t@acc[6], %rdx\t\t# conditionally negate |f0|\n\txor\t@acc[6], $g0\t\t# conditionally negate |g0|\n\tadd\t$fx, %rdx\n\tadd\t$fx, $g0\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=@acc[0]\n.size\t__smulx_191_n_shift_by_31,.-__smulx_191_n_shift_by_31\n___\n} }\n\n{\nmy ($a_lo, $a_hi, $b_lo, $b_hi) = map(\"%r$_\",(8..11));\nmy ($t0, $t1, $t2, $t3, $t4) = (\"%rax\",\"%rbx\",\"%rbp\",\"%r14\",\"%r15\");\nmy ($fg0, $fg1, $bias) = ($g0, $g1, $t4);\nmy ($a_, $b_) = ($a_lo, $b_lo);\n{\nmy @a = ($a_lo, $t1, $a_hi);\nmy @b = ($b_lo, $t2, $b_hi);\n\n$code.=<<___;\n.type\t__ab_approximation_31,\\@abi-omnipotent\n.align\t32\n__ab_approximation_31:\n\tmov\t8*5($in_ptr), @a[2]\t# load |a| in reverse order\n\tmov\t8*11($in_ptr), @b[2]\t# load |b| in reverse order\n\tmov\t8*4($in_ptr), @a[1]\n\tmov\t8*10($in_ptr), @b[1]\n\tmov\t8*3($in_ptr), @a[0]\n\tmov\t8*9($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# check top-most limbs, ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tmov\t8*2($in_ptr), @a[0]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*8($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# ... ones before top-most, ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tmov\t8*1($in_ptr), @a[0]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*7($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# ... and ones before that ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tmov\t8*0($in_ptr), @a[0]\n\tcmovz\t@b[0], @b[1]\n\tmov\t8*6($in_ptr), @b[0]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\t\t# ... and ones before that ...\n\tcmovz\t@a[1], @a[2]\n\tcmovz\t@b[1], @b[2]\n\tcmovz\t@a[0], @a[1]\n\tcmovz\t@b[0], @b[1]\n\n\tmov\t@a[2], $t0\n\tor\t@b[2], $t0\n\tbsr\t$t0, %rcx\n\tlea\t1(%rcx), %rcx\n\tcmovz\t@a[0], @a[2]\n\tcmovz\t@b[0], @b[2]\n\tcmovz\t$t0, %rcx\n\tneg\t%rcx\n\t#and\t\\$63, %rcx\t\t# debugging artefact\n\n\tshldq\t%cl, @a[1], @a[2]\t# align second limb to the left\n\tshldq\t%cl, @b[1], @b[2]\n\n\tmov\t\\$0x7FFFFFFF, %eax\n\tand\t%rax, @a[0]\n\tand\t%rax, @b[0]\n\tandn\t@a[2], %rax, @a[2]\n\tandn\t@b[2], %rax, @b[2]\n\tor\t@a[2], @a[0]\n\tor\t@b[2], @b[0]\n\n\tjmp\t__inner_loop_31\n\n\tret\n.size\t__ab_approximation_31,.-__ab_approximation_31\n___\n}\n$code.=<<___;\n.type\t__inner_loop_31,\\@abi-omnipotent\n.align\t32\n__inner_loop_31:\t\t################# by Thomas Pornin\n\tmov\t\\$0x7FFFFFFF80000000, $fg0\t# |f0|=1, |g0|=0\n\tmov\t\\$0x800000007FFFFFFF, $fg1\t# |f1|=0, |g1|=1\n\tmov\t\\$0x7FFFFFFF7FFFFFFF, $bias\n\n.Loop_31:\n\tcmp\t$b_, $a_\t\t# if |a_|<|b_|, swap the variables\n\tmov\t$a_, $t0\n\tmov\t$b_, $t1\n\tmov\t$fg0, $t2\n\tmov\t$fg1, $t3\n\tcmovb\t$b_, $a_\n\tcmovb\t$t0, $b_\n\tcmovb\t$fg1, $fg0\n\tcmovb\t$t2, $fg1\n\n\tsub\t$b_, $a_\t\t# |a_|-|b_|\n\tsub\t$fg1, $fg0\t\t# |f0|-|f1|, |g0|-|g1|\n\tadd\t$bias, $fg0\n\n\ttest\t\\$1, $t0\t\t# if |a_| was even, roll back \n\tcmovz\t$t0, $a_\n\tcmovz\t$t1, $b_\n\tcmovz\t$t2, $fg0\n\tcmovz\t$t3, $fg1\n\n\tshr\t\\$1, $a_\t\t# |a_|>>=1\n\tadd\t$fg1, $fg1\t\t# |f1|<<=1, |g1|<<=1\n\tsub\t$bias, $fg1\n\tsub\t\\$1, $cnt\n\tjnz\t.Loop_31\n\n\tshr\t\\$32, $bias\n\tmov\t%ecx, %edx\t\t# $fg0, $f0\n\tmov\t${fg1}d, ${f1}d\n\tshr\t\\$32, $g0\n\tshr\t\\$32, $g1\n\tsub\t$bias, $f0\t\t# remove the bias\n\tsub\t$bias, $g0\n\tsub\t$bias, $f1\n\tsub\t$bias, $g1\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=$a_lo\n.size\t__inner_loop_31,.-__inner_loop_31\n\n.type\t__tail_loop_55,\\@abi-omnipotent\n.align\t32\n__tail_loop_55:\n\tmov\t\\$1, $f0\t# |f0|=1\n\txor\t$g0, $g0\t# |g0|=0\n\txor\t$f1, $f1\t# |f1|=0\n\tmov\t\\$1, $g1\t# |g1|=1\n\n.Loop_55:\n\txor\t$t0, $t0\n\ttest\t\\$1, $a_lo\t# if |a_| is odd, then we'll be subtracting |b_|\n\tmov\t$b_lo, $t1\n\tcmovnz\t$b_lo, $t0\n\tsub\t$a_lo, $t1\t# |b_|-|a_|\n\tmov\t$a_lo, $t2\n\tsub\t$t0, $a_lo\t# |a_|-|b_| (or |a_|-0 if |a_| was even)\n\tcmovc\t$t1, $a_lo\t# borrow means |a_|<|b_|, replace with |b_|-|a_|\n\tcmovc\t$t2, $b_lo\t# |b_| = |a_|\n\tmov\t$f0, $t0\t# exchange |f0| and |f1|\n\tcmovc\t$f1, $f0\n\tcmovc\t$t0, $f1\n\tmov\t$g0, $t1\t# exchange |g0| and |g1|\n\tcmovc\t$g1, $g0\n\tcmovc\t$t1, $g1\n\txor\t$t0, $t0\n\txor\t$t1, $t1\n\tshr\t\\$1, $a_lo\n\ttest\t\\$1, $t2\t# if |a_| was odd, then we'll be subtracting...\n\tcmovnz\t$f1, $t0\n\tcmovnz\t$g1, $t1\n\tadd\t$f1, $f1\t# |f1|<<=1\n\tadd\t$g1, $g1\t# |g1|<<=1\n\tsub\t$t0, $f0\t# |f0|-=|f1| (or |f0-=0| if |a_| was even)\n\tsub\t$t1, $g0\t# |g0|-=|g1| (or |g0-=0| ...)\n\tsub\t\\$1, $cnt\n\tjnz\t.Loop_55\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=$a_lo\n.size\t__tail_loop_55,.-__tail_loop_55\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/div3w-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\n$code.=<<___;\n.text\n\n.globl\tdiv_3_limbs\n.hidden\tdiv_3_limbs\n.type\tdiv_3_limbs,%function\n.align\t5\ndiv_3_limbs:\n\thint\t#34\n\tldp\tx4,x5,[x0]\t// load R\n\teor\tx0,x0,x0\t// Q = 0\n\tmov\tx3,#64\t\t// loop counter\n\tnop\n\n.Loop:\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsbcs\tx7,x5,x2\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tcsel\tx4,x4,x6,lo\t// select between R and R - D\n\t extr\tx1,x2,x1,#1\t// D >>= 1\n\tcsel\tx5,x5,x7,lo\n\t lsr\tx2,x2,#1\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\tsub\tx3,x3,#1\n\tcbnz\tx3,.Loop\n\n\tasr\tx3,x0,#63\t// top bit -> mask\n\tadd\tx0,x0,x0\t// Q <<= 1\n\tsubs\tx6,x4,x1\t// R - D\n\tadd\tx0,x0,#1\t// Q + speculative bit\n\tsbcs\tx7,x5,x2\n\tsbc\tx0,x0,xzr\t// subtract speculative bit\n\n\torr\tx0,x0,x3\t// all ones if overflow\n\n\tret\n.size\tdiv_3_limbs,.-div_3_limbs\n___\n{\nmy ($div_rem, $divisor, $quot) = map(\"x$_\",(0..2));\nmy @div = map(\"x$_\",(3..4));\nmy @acc = map(\"x$_\",(5..7));\nmy @t = map(\"x$_\",(8..11));\n\n$code.=<<___;\n.globl\tquot_rem_128\n.hidden\tquot_rem_128\n.type\tquot_rem_128,%function\n.align\t5\nquot_rem_128:\n\thint\t#34\n\tldp\t@div[0],@div[1],[$divisor]\n\n\tmul\t@acc[0],@div[0],$quot\t// divisor[0:1} * quotient\n\tumulh\t@acc[1],@div[0],$quot\n\tmul\t@t[3],  @div[1],$quot\n\tumulh\t@acc[2],@div[1],$quot\n\n\tldp\t@t[0],@t[1],[$div_rem]\t// load 3 limbs of the dividend\n\tldr\t@t[2],[$div_rem,#16]\n\n\tadds\t@acc[1],@acc[1],@t[3]\n\tadc\t@acc[2],@acc[2],xzr\n\n\tsubs\t@t[0],@t[0],@acc[0]\t// dividend - divisor * quotient\n\tsbcs\t@t[1],@t[1],@acc[1]\n\tsbcs\t@t[2],@t[2],@acc[2]\n\tsbc\t@acc[0],xzr,xzr\t\t// borrow -> mask\n\n\tadd\t$quot,$quot,@acc[0]\t// if borrowed, adjust the quotient ...\n\tand\t@div[0],@div[0],@acc[0]\n\tand\t@div[1],@div[1],@acc[0]\n\tadds\t@t[0],@t[0],@div[0]\t// ... and add divisor\n\tadc\t@t[1],@t[1],@div[1]\n\n\tstp\t@t[0],@t[1],[$div_rem]\t// save 2 limbs of the remainder\n\tstr\t$quot,[$div_rem,#16]\t// and one limb of the quotient\n\n\tmov\tx0,$quot\t\t// return adjusted quotient\n\n\tret\n.size\tquot_rem_128,.-quot_rem_128\n\n.globl\tquot_rem_64\n.hidden\tquot_rem_64\n.type\tquot_rem_64,%function\n.align\t5\nquot_rem_64:\n\thint\t#34\n\tldr\t@div[0],[$divisor]\n\tldr\t@t[0],[$div_rem]\t// load 1 limb of the dividend\n\n\tmul\t@acc[0],@div[0],$quot\t// divisor * quotient\n\n\tsub\t@t[0],@t[0],@acc[0]\t// dividend - divisor * quotient\n\n\tstp\t@t[0],$quot,[$div_rem]\t// save remainder and quotient\n\n\tmov\tx0,$quot\t\t// return quotient\n\n\tret\n.size\tquot_rem_64,.-quot_rem_64\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/div3w-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$c_ref=<<'___';\n/*\n * |div_top| points at two most significant limbs of the dividend, |d_hi|\n * and |d_lo| are two most significant limbs of the divisor. If divisor\n * is only one limb, it is to be passed in |d_hi| with zero in |d_lo|.\n * The divisor is required to be \"bitwise left-aligned,\" and dividend's\n * top limbs to be not larger than the divisor's. The latter limitation\n * can be problematic in the first iteration of multi-precision division,\n * where in most general case the condition would have to be \"smaller.\"\n * The subroutine considers four limbs, two of which are \"overlapping,\"\n * hence the name... Another way to look at it is to think of the pair\n * of the dividend's limbs being suffixed with a zero:\n *   +-------+-------+-------+\n * R |       |       |   0   |\n *   +-------+-------+-------+\n *           +-------+-------+\n * D         |       |       |\n *           +-------+-------+\n */\nlimb_t div_3_limbs(const limb_t *div_top, limb_t d_lo, limb_t d_hi)\n{\n    llimb_t R = ((llimb_t)div_top[1] << LIMB_BITS) | div_top[0];\n    llimb_t D = ((llimb_t)d_hi << LIMB_BITS) | d_lo;\n    limb_t Q = 0, mask;\n    size_t i;\n\n    for (i = 0; i < LIMB_BITS; i++) {\n        Q <<= 1;\n        mask = (R >= D);\n        Q |= mask;\n        R -= (D & ((llimb_t)0 - mask));\n        D >>= 1;\n    }\n\n    mask = 0 - (Q >> (LIMB_BITS - 1));   /* does it overflow? */\n\n    Q <<= 1;\n    Q |= (R >= D);\n\n    return (Q | mask);\n}\n___\n\n$code.=<<___;\n.text\n\n.globl\tdiv_3_limbs\n.hidden\tdiv_3_limbs\n.type\tdiv_3_limbs,\\@function,3,\"unwind\"\n.align\t32\ndiv_3_limbs:\n.cfi_startproc\n.cfi_end_prologue\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t(%rdi),%r8\t\t# load R.lo\n\tmov\t8(%rdi),%r9\t\t# load R.hi\n\txor\t%rax,%rax\t\t# Q = 0\n\tmov\t\\$64,%ecx\t\t# loop counter\n\n.Loop:\n\t mov\t%r8,%r10\t\t# put aside R\n\tsub\t%rsi,%r8\t\t# R -= D\n\t mov\t%r9,%r11\n\tsbb\t%rdx,%r9\n\tlea\t1(%rax,%rax),%rax\t# Q <<= 1 + speculative bit\n\t mov\t%rdx,%rdi\n\tcmovc\t%r10,%r8\t\t# restore R if R - D borrowed\n\tcmovc\t%r11,%r9\n\tsbb\t\\$0,%rax\t\t# subtract speculative bit\n\t shl\t\\$63,%rdi\n\t shr\t\\$1,%rsi\n\t shr\t\\$1,%rdx\n\t or\t%rdi,%rsi\t\t# D >>= 1\n\tsub\t\\$1,%ecx\n\tjnz\t.Loop\n\n\tlea\t1(%rax,%rax),%rcx\t# Q <<= 1 + speculative bit\n\tsar\t\\$63,%rax\t\t# top bit -> mask\n\n\tsub\t%rsi,%r8\t\t# R -= D\n\tsbb\t%rdx,%r9\n\tsbb\t\\$0,%rcx\t\t# subtract speculative bit\n\n\tor\t%rcx,%rax\t\t# all ones if overflow\n\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tdiv_3_limbs,.-div_3_limbs\n___\n########################################################################\n# Calculate remainder and adjust the quotient, which can be off-by-one.\n# Then save quotient in limb next to top limb of the remainder. There is\n# place, because the remainder/next-iteration-dividend gets shorter by\n# one limb.\n{\nmy ($div_rem, $divisor, $quotient) = (\"%rdi\", \"%rsi\", \"%rcx\");\nmy @acc = (\"%r8\", \"%r9\", \"%rdx\");\nmy @tmp = (\"%r10\", \"%r11\", \"%rax\");\n\n$code.=<<___;\n.globl\tquot_rem_128\n.hidden\tquot_rem_128\n.type\tquot_rem_128,\\@function,3,\"unwind\"\n.align\t32\nquot_rem_128:\n.cfi_startproc\n.cfi_end_prologue\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t%rdx, %rax\n\tmov\t%rdx, $quotient\n\n\tmulq\t0($divisor)\t\t# divisor[0:1] * quotient\n\tmov\t%rax, @acc[0]\n\tmov\t$quotient, %rax\n\tmov\t%rdx, @acc[1]\n\n\tmulq\t8($divisor)\n\tadd\t%rax, @acc[1]\n\tadc\t\\$0, %rdx\t\t# %rdx is @acc[2]\n\n\tmov\t0($div_rem), @tmp[0]\t# load 3 limbs of the dividend\n\tmov\t8($div_rem), @tmp[1]\n\tmov\t16($div_rem), @tmp[2]\n\n\tsub\t@acc[0], @tmp[0]\t# dividend - divisor * quotient\n\tsbb\t@acc[1], @tmp[1]\n\tsbb\t@acc[2], @tmp[2]\n\tsbb\t@acc[0], @acc[0]\t# borrow -> mask\n\n\tadd\t@acc[0], $quotient\t# if borrowed, adjust the quotient ...\n\tmov\t@acc[0], @acc[1]\n\tand\t0($divisor), @acc[0]\n\tand\t8($divisor), @acc[1]\n\tadd\t@acc[0], @tmp[0]\t# ... and add divisor\n\tadc\t@acc[1], @tmp[1]\n\n\tmov\t@tmp[0], 0($div_rem)\t# save 2 limbs of the remainder ...\n\tmov\t@tmp[1], 8($div_rem)\n\tmov\t$quotient, 16($div_rem)\t# ... and 1 limb of the quotient\n\n\tmov\t$quotient, %rax\t\t# return adjusted quotient\n\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tquot_rem_128,.-quot_rem_128\n\n########################################################################\n# Unlike 128-bit case above, quotient is exact. As result just one limb\n# of the dividend is sufficient to calculate the remainder...\n\n.globl\tquot_rem_64\n.hidden\tquot_rem_64\n.type\tquot_rem_64,\\@function,3,\"unwind\"\n.align\t32\nquot_rem_64:\n.cfi_startproc\n.cfi_end_prologue\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t%rdx, %rax\t\t# return quotient\n\timulq\t0($divisor), %rdx\t# divisor[0] * quotient\n\n\tmov\t0($div_rem), @tmp[0]\t# load 1 limb of the dividend\n\n\tsub\t%rdx, @tmp[0]\t\t# dividend - divisor * quotient\n\n\tmov\t@tmp[0], 0($div_rem)\t# save 1 limb of the remainder ...\n\tmov\t%rax, 8($div_rem)\t# ... and 1 limb of the quotient\n\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tquot_rem_64,.-quot_rem_64\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/mul_mont_256-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# As for \"sparse\" in subroutine names, see commentary in the\n# asm/mulx_mont_256-x86_64.pl module.\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\n($r_ptr,$a_ptr,$b_ptr,$n_ptr,$n0) = map(\"x$_\", 0..4);\n\n@mod=map(\"x$_\",(5..8));\n$bi=\"x9\";\n@a=map(\"x$_\",(10..13));\n@tmp=map(\"x$_\",(14..17));\n@acc=map(\"x$_\",(19..24));\n$m0=$n_ptr;\n\n$code.=<<___;\n.text\n\n.globl\tmul_mont_sparse_256\n.hidden\tmul_mont_sparse_256\n.type\tmul_mont_sparse_256,%function\n.align\t5\nmul_mont_sparse_256:\n\thint\t#34\n\tstp\tc29,c30,[csp,#-8*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldr\t$bi,        [$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\n\tmul\t@acc[0],@a[0],$bi\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tmul\t@acc[1],@a[1],$bi\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tmul\t@acc[2],@a[2],$bi\n\tmul\t@acc[3],@a[3],$bi\n\n\t umulh\t@tmp[0],@a[0],$bi\n\t umulh\t@tmp[1],@a[1],$bi\n\tmul\t$m0,$n0,@acc[0]\n\t umulh\t@tmp[2],@a[2],$bi\n\t umulh\t@tmp[3],@a[3],$bi\n\t adds\t@acc[1],@acc[1],@tmp[0]\n\t//mul\t@tmp[0],@mod[0],$m0\n\t adcs\t@acc[2],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@mod[1],$m0\n\t adcs\t@acc[3],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@mod[2],$m0\n\t adc\t@acc[4],xzr,    @tmp[3]\n\tmul\t@tmp[3],@mod[3],$m0\n___\nfor ($i=1;$i<4;$i++) {\n$code.=<<___;\n\tldr\t$bi,[$b_ptr,8*$i]\n\tsubs\txzr,@acc[0],#1\t\t//adds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$m0\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$m0\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$m0\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$m0\n\tadc\t@acc[4],@acc[4],xzr\n\n\t adds\t@acc[0],@acc[1],@tmp[0]\n\tmul\t@tmp[0],@a[0],$bi\n\t adcs\t@acc[1],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@a[1],$bi\n\t adcs\t@acc[2],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@a[2],$bi\n\t adcs\t@acc[3],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@a[3],$bi\n\t adc\t@acc[4],xzr,xzr\n\n\tadds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@a[0],$bi\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@a[1],$bi\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\tmul\t$m0,$n0,@acc[0]\n\t umulh\t@tmp[2],@a[2],$bi\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@a[3],$bi\n\tadc\t@acc[4],@acc[4],xzr\n\n\t adds\t@acc[1],@acc[1],@tmp[0]\n\t//mul\t@tmp[0],@mod[0],$m0\n\t adcs\t@acc[2],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@mod[1],$m0\n\t adcs\t@acc[3],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@mod[2],$m0\n\t adc\t@acc[4],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@mod[3],$m0\n___\n}\n$code.=<<___;\n\tsubs\txzr,@acc[0],#1\t\t//adds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$m0\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$m0\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$m0\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$m0\n\tadc\t@acc[4],@acc[4],xzr\n\n\t adds\t@acc[0],@acc[1],@tmp[0]\n\t adcs\t@acc[1],@acc[2],@tmp[1]\n\t adcs\t@acc[2],@acc[3],@tmp[2]\n\t adcs\t@acc[3],@acc[4],@tmp[3]\n\t adc\t@acc[4],xzr,xzr\n\n\tsubs\t@tmp[0],@acc[0],@mod[0]\n\tsbcs\t@tmp[1],@acc[1],@mod[1]\n\tsbcs\t@tmp[2],@acc[2],@mod[2]\n\tsbcs\t@tmp[3],@acc[3],@mod[3]\n\tsbcs\txzr,    @acc[4],xzr\n\n\tcsel\t@acc[0],@acc[0],@tmp[0],lo\n\tcsel\t@acc[1],@acc[1],@tmp[1],lo\n\tcsel\t@acc[2],@acc[2],@tmp[2],lo\n\tcsel\t@acc[3],@acc[3],@tmp[3],lo\n\n\tstp\t@acc[0],@acc[1],[$r_ptr]\n\tstp\t@acc[2],@acc[3],[$r_ptr,#16]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#8*__SIZEOF_POINTER__\n\tret\n.size\tmul_mont_sparse_256,.-mul_mont_sparse_256\n___\n{\nmy @acc = (@a,@acc[0..3]);\nmy @a = @mod;\n\n$code.=<<___;\n.globl\tsqr_mont_sparse_256\n.hidden\tsqr_mont_sparse_256\n.type\tsqr_mont_sparse_256,%function\n.align\t5\nsqr_mont_sparse_256:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-6*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tmov\t$n0,$n_ptr\n\n\t////////////////////////////////////////////////////////////////\n\t//  |  |  |  |  |  |a1*a0|  |\n\t//  |  |  |  |  |a2*a0|  |  |\n\t//  |  |a3*a2|a3*a0|  |  |  |\n\t//  |  |  |  |a2*a1|  |  |  |\n\t//  |  |  |a3*a1|  |  |  |  |\n\t// *|  |  |  |  |  |  |  | 2|\n\t// +|a3*a3|a2*a2|a1*a1|a0*a0|\n\t//  |--+--+--+--+--+--+--+--|\n\t//  |A7|A6|A5|A4|A3|A2|A1|A0|, where Ax is @acc[x]\n\t//\n\t//  \"can't overflow\" below mark carrying into high part of\n\t//  multiplication result, which can't overflow, because it\n\t//  can never be all ones.\n\n\tmul\t@acc[1],@a[1],@a[0]\t// a[1]*a[0]\n\tumulh\t@tmp[1],@a[1],@a[0]\n\tmul\t@acc[2],@a[2],@a[0]\t// a[2]*a[0]\n\tumulh\t@tmp[2],@a[2],@a[0]\n\tmul\t@acc[3],@a[3],@a[0]\t// a[3]*a[0]\n\tumulh\t@acc[4],@a[3],@a[0]\n\n\tadds\t@acc[2],@acc[2],@tmp[1]\t// accumulate high parts of multiplication\n\t mul\t@tmp[0],@a[2],@a[1]\t// a[2]*a[1]\n\t umulh\t@tmp[1],@a[2],@a[1]\n\tadcs\t@acc[3],@acc[3],@tmp[2]\n\t mul\t@tmp[2],@a[3],@a[1]\t// a[3]*a[1]\n\t umulh\t@tmp[3],@a[3],@a[1]\n\tadc\t@acc[4],@acc[4],xzr\t// can't overflow\n\n\tmul\t@acc[5],@a[3],@a[2]\t// a[3]*a[2]\n\tumulh\t@acc[6],@a[3],@a[2]\n\n\tadds\t@tmp[1],@tmp[1],@tmp[2]\t// accumulate high parts of multiplication\n\t mul\t@acc[0],@a[0],@a[0]\t// a[0]*a[0]\n\tadc\t@tmp[2],@tmp[3],xzr\t// can't overflow\n\n\tadds\t@acc[3],@acc[3],@tmp[0]\t// accumulate low parts of multiplication\n\t umulh\t@a[0],@a[0],@a[0]\n\tadcs\t@acc[4],@acc[4],@tmp[1]\n\t mul\t@tmp[1],@a[1],@a[1]\t// a[1]*a[1]\n\tadcs\t@acc[5],@acc[5],@tmp[2]\n\t umulh\t@a[1],@a[1],@a[1]\n\tadc\t@acc[6],@acc[6],xzr\t// can't overflow\n\n\tadds\t@acc[1],@acc[1],@acc[1]\t// acc[1-6]*=2\n\t mul\t@tmp[2],@a[2],@a[2]\t// a[2]*a[2]\n\tadcs\t@acc[2],@acc[2],@acc[2]\n\t umulh\t@a[2],@a[2],@a[2]\n\tadcs\t@acc[3],@acc[3],@acc[3]\n\t mul\t@tmp[3],@a[3],@a[3]\t// a[3]*a[3]\n\tadcs\t@acc[4],@acc[4],@acc[4]\n\t umulh\t@a[3],@a[3],@a[3]\n\tadcs\t@acc[5],@acc[5],@acc[5]\n\tadcs\t@acc[6],@acc[6],@acc[6]\n\tadc\t@acc[7],xzr,xzr\n\n\tadds\t@acc[1],@acc[1],@a[0]\t// +a[i]*a[i]\n\tadcs\t@acc[2],@acc[2],@tmp[1]\n\tadcs\t@acc[3],@acc[3],@a[1]\n\tadcs\t@acc[4],@acc[4],@tmp[2]\n\tadcs\t@acc[5],@acc[5],@a[2]\n\tadcs\t@acc[6],@acc[6],@tmp[3]\n\tadc\t@acc[7],@acc[7],@a[3]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadds\t@acc[0],@acc[0],@acc[4]\t// accumulate upper half\n\tadcs\t@acc[1],@acc[1],@acc[5]\n\tadcs\t@acc[2],@acc[2],@acc[6]\n\tadcs\t@acc[3],@acc[3],@acc[7]\n\tadc\t@acc[4],xzr,xzr\n\n\tsubs\t@tmp[0],@acc[0],@mod[0]\n\tsbcs\t@tmp[1],@acc[1],@mod[1]\n\tsbcs\t@tmp[2],@acc[2],@mod[2]\n\tsbcs\t@tmp[3],@acc[3],@mod[3]\n\tsbcs\txzr,    @acc[4],xzr\n\n\tcsel\t@acc[0],@acc[0],@tmp[0],lo\n\tcsel\t@acc[1],@acc[1],@tmp[1],lo\n\tcsel\t@acc[2],@acc[2],@tmp[2],lo\n\tcsel\t@acc[3],@acc[3],@tmp[3],lo\n\n\tstp\t@acc[0],@acc[1],[$r_ptr]\n\tstp\t@acc[2],@acc[3],[$r_ptr,#16]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#6*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_mont_sparse_256,.-sqr_mont_sparse_256\n___\n}\n{\nmy @a = (@a, $bi);\n\n$code.=<<___;\n.globl\tfrom_mont_256\n.hidden\tfrom_mont_256\n.type\tfrom_mont_256,%function\n.align\t5\nfrom_mont_256:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-2*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\n\tmov\t$n0,$n_ptr\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tsubs\t@tmp[0],@a[0],@mod[0]\n\tsbcs\t@tmp[1],@a[1],@mod[1]\n\tsbcs\t@tmp[2],@a[2],@mod[2]\n\tsbcs\t@tmp[3],@a[3],@mod[3]\n\n\tcsel\t@a[0],@a[0],@tmp[0],lo\n\tcsel\t@a[1],@a[1],@tmp[1],lo\n\tcsel\t@a[2],@a[2],@tmp[2],lo\n\tcsel\t@a[3],@a[3],@tmp[3],lo\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tldr\tc29,[csp],#2*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tfrom_mont_256,.-from_mont_256\n\n.globl\tredc_mont_256\n.hidden\tredc_mont_256\n.type\tredc_mont_256,%function\n.align\t5\nredc_mont_256:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-2*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\n\tmov\t$n0,$n_ptr\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\n\tbl\t__mul_by_1_mont_256\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\t@tmp[0],@tmp[1],[$a_ptr,#32]\n\tldp\t@tmp[2],@tmp[3],[$a_ptr,#48]\n\n\tadds\t@a[0],@a[0],@tmp[0]\n\tadcs\t@a[1],@a[1],@tmp[1]\n\tadcs\t@a[2],@a[2],@tmp[2]\n\tadcs\t@a[3],@a[3],@tmp[3]\n\tadc\t@a[4],xzr,xzr\n\n\tsubs\t@tmp[0],@a[0],@mod[0]\n\tsbcs\t@tmp[1],@a[1],@mod[1]\n\tsbcs\t@tmp[2],@a[2],@mod[2]\n\tsbcs\t@tmp[3],@a[3],@mod[3]\n\tsbcs\txzr,    @a[4],xzr\n\n\tcsel\t@a[0],@a[0],@tmp[0],lo\n\tcsel\t@a[1],@a[1],@tmp[1],lo\n\tcsel\t@a[2],@a[2],@tmp[2],lo\n\tcsel\t@a[3],@a[3],@tmp[3],lo\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\n\tldr\tc29,[csp],#2*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tredc_mont_256,.-redc_mont_256\n\n.type\t__mul_by_1_mont_256,%function\n.align\t5\n__mul_by_1_mont_256:\n\tmul\t$m0,$n0,@a[0]\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n___\nfor ($i=1;$i<4;$i++) {\n$code.=<<___;\n\t//mul\t@tmp[0],@mod[0],$m0\n\tmul\t@tmp[1],@mod[1],$m0\n\tmul\t@tmp[2],@mod[2],$m0\n\tmul\t@tmp[3],@mod[3],$m0\n\tsubs\txzr,@a[0],#1\t\t//adds\t@a[0],@a[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$m0\n\tadcs\t@a[1],@a[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$m0\n\tadcs\t@a[2],@a[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$m0\n\tadcs\t@a[3],@a[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$m0\n\tadc\t@a[4],xzr,xzr\n\n\t adds\t@a[0],@a[1],@tmp[0]\n\t adcs\t@a[1],@a[2],@tmp[1]\n\t adcs\t@a[2],@a[3],@tmp[2]\n\tmul\t$m0,$n0,@a[0]\n\t adc\t@a[3],@a[4],@tmp[3]\n___\n}\n$code.=<<___;\n\t//mul\t@tmp[0],@mod[0],$m0\n\tmul\t@tmp[1],@mod[1],$m0\n\tmul\t@tmp[2],@mod[2],$m0\n\tmul\t@tmp[3],@mod[3],$m0\n\tsubs\txzr,@a[0],#1\t\t//adds\t@a[0],@a[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$m0\n\tadcs\t@a[1],@a[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$m0\n\tadcs\t@a[2],@a[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$m0\n\tadcs\t@a[3],@a[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$m0\n\tadc\t@a[4],xzr,xzr\n\n\t adds\t@a[0],@a[1],@tmp[0]\n\t adcs\t@a[1],@a[2],@tmp[1]\n\t adcs\t@a[2],@a[3],@tmp[2]\n\t adc\t@a[3],@a[4],@tmp[3]\n\n\tret\n.size\t__mul_by_1_mont_256,.-__mul_by_1_mont_256\n___\n}\n\nprint $code;\n\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/mul_mont_384-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\n($r_ptr,$a_ptr,$b_ptr,$n_ptr,$n0) = map(\"x$_\", 0..4);\n\n@mod = map(\"x$_\",(5..10));\n@a   = map(\"x$_\",(11..16));\n$bi  = \"x17\";\n@acc = map(\"x$_\",(19..25));\n@tmp = map(\"x$_\",(26..28,0,1,3));\n\n$code.=<<___;\n.text\n\n.globl\tadd_mod_384x384\n.hidden\tadd_mod_384x384\n.type\tadd_mod_384x384,%function\n.align\t5\nadd_mod_384x384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-8*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__add_mod_384x384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#8*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tadd_mod_384x384,.-add_mod_384x384\n\n.type\t__add_mod_384x384,%function\n.align\t5\n__add_mod_384x384:\n\tldp\t@a[0],  @a[1],  [$a_ptr]\n\tldp\t@acc[0],@acc[1],[$b_ptr]\n\tldp\t@a[2],  @a[3],  [$a_ptr,#16]\n\tadds\t@a[0],@a[0],@acc[0]\n\tldp\t@acc[2],@acc[3],[$b_ptr,#16]\n\tadcs\t@a[1],@a[1],@acc[1]\n\tldp\t@a[4],  @a[5],  [$a_ptr,#32]\n\tadcs\t@a[2],@a[2],@acc[2]\n\tldp\t@acc[4],@acc[5],[$b_ptr,#32]\n\tadcs\t@a[3],@a[3],@acc[3]\n\t stp\t@a[0],  @a[1],  [$r_ptr]\n\tadcs\t@a[4],@a[4],@acc[4]\n\t ldp\t@a[0],  @a[1],  [$a_ptr,#48]\n\tadcs\t@a[5],@a[5],@acc[5]\n\n\t ldp\t@acc[0],@acc[1],[$b_ptr,#48]\n\t stp\t@a[2],  @a[3],  [$r_ptr,#16]\n\t ldp\t@a[2],  @a[3],  [$a_ptr,#64]\n\t ldp\t@acc[2],@acc[3],[$b_ptr,#64]\n\n\tadcs\t@a[0],@a[0],@acc[0]\n\t stp\t@a[4],  @a[5],  [$r_ptr,#32]\n\tadcs\t@a[1],@a[1],@acc[1]\n\t ldp\t@a[4],  @a[5],  [$a_ptr,#80]\n\tadcs\t@a[2],@a[2],@acc[2]\n\t ldp\t@acc[4],@acc[5],[$b_ptr,#80]\n\tadcs\t@a[3],@a[3],@acc[3]\n\tadcs\t@a[4],@a[4],@acc[4]\n\tadcs\t@a[5],@a[5],@acc[5]\n\tadc\t$bi,xzr,xzr\n\n\tsubs\t@acc[0],@a[0],@mod[0]\n\tsbcs\t@acc[1],@a[1],@mod[1]\n\tsbcs\t@acc[2],@a[2],@mod[2]\n\tsbcs\t@acc[3],@a[3],@mod[3]\n\tsbcs\t@acc[4],@a[4],@mod[4]\n\tsbcs\t@acc[5],@a[5],@mod[5]\n\tsbcs\txzr,$bi,xzr\n\n\tcsel\t@a[0],@a[0],@acc[0],lo\n\tcsel\t@a[1],@a[1],@acc[1],lo\n\tcsel\t@a[2],@a[2],@acc[2],lo\n\tcsel\t@a[3],@a[3],@acc[3],lo\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tcsel\t@a[4],@a[4],@acc[4],lo\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tcsel\t@a[5],@a[5],@acc[5],lo\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tret\n.size\t__add_mod_384x384,.-__add_mod_384x384\n\n.globl\tsub_mod_384x384\n.hidden\tsub_mod_384x384\n.type\tsub_mod_384x384,%function\n.align\t5\nsub_mod_384x384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-8*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__sub_mod_384x384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#8*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsub_mod_384x384,.-sub_mod_384x384\n\n.type\t__sub_mod_384x384,%function\n.align\t5\n__sub_mod_384x384:\n\tldp\t@a[0],  @a[1],  [$a_ptr]\n\tldp\t@acc[0],@acc[1],[$b_ptr]\n\tldp\t@a[2],  @a[3],  [$a_ptr,#16]\n\tsubs\t@a[0],@a[0],@acc[0]\n\tldp\t@acc[2],@acc[3],[$b_ptr,#16]\n\tsbcs\t@a[1],@a[1],@acc[1]\n\tldp\t@a[4],  @a[5],  [$a_ptr,#32]\n\tsbcs\t@a[2],@a[2],@acc[2]\n\tldp\t@acc[4],@acc[5],[$b_ptr,#32]\n\tsbcs\t@a[3],@a[3],@acc[3]\n\t stp\t@a[0],  @a[1],  [$r_ptr]\n\tsbcs\t@a[4],@a[4],@acc[4]\n\t ldp\t@a[0],  @a[1],  [$a_ptr,#48]\n\tsbcs\t@a[5],@a[5],@acc[5]\n\n\t ldp\t@acc[0],@acc[1],[$b_ptr,#48]\n\t stp\t@a[2],  @a[3],  [$r_ptr,#16]\n\t ldp\t@a[2],  @a[3],  [$a_ptr,#64]\n\t ldp\t@acc[2],@acc[3],[$b_ptr,#64]\n\n\tsbcs\t@a[0],@a[0],@acc[0]\n\t stp\t@a[4],  @a[5],  [$r_ptr,#32]\n\tsbcs\t@a[1],@a[1],@acc[1]\n\t ldp\t@a[4],  @a[5],  [$a_ptr,#80]\n\tsbcs\t@a[2],@a[2],@acc[2]\n\t ldp\t@acc[4],@acc[5],[$b_ptr,#80]\n\tsbcs\t@a[3],@a[3],@acc[3]\n\tsbcs\t@a[4],@a[4],@acc[4]\n\tsbcs\t@a[5],@a[5],@acc[5]\n\tsbc\t$bi,xzr,xzr\n\n\t and\t@acc[0],@mod[0],$bi\n\t and\t@acc[1],@mod[1],$bi\n\tadds\t@a[0],@a[0],@acc[0]\n\t and\t@acc[2],@mod[2],$bi\n\tadcs\t@a[1],@a[1],@acc[1]\n\t and\t@acc[3],@mod[3],$bi\n\tadcs\t@a[2],@a[2],@acc[2]\n\t and\t@acc[4],@mod[4],$bi\n\tadcs\t@a[3],@a[3],@acc[3]\n\t and\t@acc[5],@mod[5],$bi\n\tadcs\t@a[4],@a[4],@acc[4]\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tadc\t@a[5],@a[5],@acc[5]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tret\n.size\t__sub_mod_384x384,.-__sub_mod_384x384\n\n.type\t__add_mod_384,%function\n.align\t5\n__add_mod_384:\n\tldp\t@a[0],  @a[1],  [$a_ptr]\n\tldp\t@acc[0],@acc[1],[$b_ptr]\n\tldp\t@a[2],  @a[3],  [$a_ptr,#16]\n\tadds\t@a[0],@a[0],@acc[0]\n\tldp\t@acc[2],@acc[3],[$b_ptr,#16]\n\tadcs\t@a[1],@a[1],@acc[1]\n\tldp\t@a[4],  @a[5],  [$a_ptr,#32]\n\tadcs\t@a[2],@a[2],@acc[2]\n\tldp\t@acc[4],@acc[5],[$b_ptr,#32]\n\tadcs\t@a[3],@a[3],@acc[3]\n\tadcs\t@a[4],@a[4],@acc[4]\n\tadcs\t@a[5],@a[5],@acc[5]\n\tadc\t$bi,xzr,xzr\n\n\tsubs\t@acc[0],@a[0],@mod[0]\n\tsbcs\t@acc[1],@a[1],@mod[1]\n\tsbcs\t@acc[2],@a[2],@mod[2]\n\tsbcs\t@acc[3],@a[3],@mod[3]\n\tsbcs\t@acc[4],@a[4],@mod[4]\n\tsbcs\t@acc[5],@a[5],@mod[5]\n\tsbcs\txzr,$bi,xzr\n\n\tcsel\t@a[0],@a[0],@acc[0],lo\n\tcsel\t@a[1],@a[1],@acc[1],lo\n\tcsel\t@a[2],@a[2],@acc[2],lo\n\tcsel\t@a[3],@a[3],@acc[3],lo\n\tcsel\t@a[4],@a[4],@acc[4],lo\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tcsel\t@a[5],@a[5],@acc[5],lo\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tret\n.size\t__add_mod_384,.-__add_mod_384\n\n.type\t__sub_mod_384,%function\n.align\t5\n__sub_mod_384:\n\tldp\t@a[0],  @a[1],  [$a_ptr]\n\tldp\t@acc[0],@acc[1],[$b_ptr]\n\tldp\t@a[2],  @a[3],  [$a_ptr,#16]\n\tsubs\t@a[0],@a[0],@acc[0]\n\tldp\t@acc[2],@acc[3],[$b_ptr,#16]\n\tsbcs\t@a[1],@a[1],@acc[1]\n\tldp\t@a[4],  @a[5],  [$a_ptr,#32]\n\tsbcs\t@a[2],@a[2],@acc[2]\n\tldp\t@acc[4],@acc[5],[$b_ptr,#32]\n\tsbcs\t@a[3],@a[3],@acc[3]\n\tsbcs\t@a[4],@a[4],@acc[4]\n\tsbcs\t@a[5],@a[5],@acc[5]\n\tsbc\t$bi,xzr,xzr\n\n\t and\t@acc[0],@mod[0],$bi\n\t and\t@acc[1],@mod[1],$bi\n\tadds\t@a[0],@a[0],@acc[0]\n\t and\t@acc[2],@mod[2],$bi\n\tadcs\t@a[1],@a[1],@acc[1]\n\t and\t@acc[3],@mod[3],$bi\n\tadcs\t@a[2],@a[2],@acc[2]\n\t and\t@acc[4],@mod[4],$bi\n\tadcs\t@a[3],@a[3],@acc[3]\n\t and\t@acc[5],@mod[5],$bi\n\tadcs\t@a[4],@a[4],@acc[4]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tadc\t@a[5],@a[5],@acc[5]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tret\n.size\t__sub_mod_384,.-__sub_mod_384\n\n.globl\tmul_mont_384x\n.hidden\tmul_mont_384x\n.type\tmul_mont_384x,%function\n.align\t5\nmul_mont_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp,csp,#288\t\t// space for 3 768-bit vectors\n\n\tcmov\t@tmp[0],$r_ptr\t\t// save r_ptr\n\tcmov\t@tmp[1],$a_ptr\t\t// save b_ptr\n\tcmov\t@tmp[2],$b_ptr\t\t// save b_ptr\n\n\tcadd\t$r_ptr,sp,#0\t\t// mul_384(t0, a->re, b->re)\n\tbl\t__mul_384\n\n\tcadd\t$a_ptr,$a_ptr,#48\t// mul_384(t1, a->im, b->im)\n\tcadd\t$b_ptr,$b_ptr,#48\n\tcadd\t$r_ptr,sp,#96\n\tbl\t__mul_384\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tcsub\t$b_ptr,$a_ptr,#48\n\tcadd\t$r_ptr,sp,#240\n\tbl\t__add_mod_384\n\n\tcadd\t$a_ptr,@tmp[2],#0\n\tcadd\t$b_ptr,@tmp[2],#48\n\tcadd\t$r_ptr,sp,#192\t\t// t2\n\tbl\t__add_mod_384\n\n\tcadd\t$a_ptr,$r_ptr,#0\n\tcadd\t$b_ptr,$r_ptr,#48\n\tbl\t__mul_384\t\t// mul_384(t2, a->re+a->im, b->re+b->im)\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tcmov\t$a_ptr,$r_ptr\n\tcadd\t$b_ptr,sp,#0\n\tbl\t__sub_mod_384x384\n\n\tcadd\t$b_ptr,sp,#96\n\tbl\t__sub_mod_384x384\t// t2 = t2-t0-t1\n\n\tcadd\t$a_ptr,sp,#0\n\tcadd\t$b_ptr,sp,#96\n\tcadd\t$r_ptr,sp,#0\n\tbl\t__sub_mod_384x384\t// t0 = t0-t1\n\n\tcadd\t$a_ptr,sp,#0\t\t// ret->re = redc(t0)\n\tcadd\t$r_ptr,@tmp[0],#0\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\n\tcadd\t$a_ptr,sp,#192\t\t// ret->im = redc(t2)\n\tcadd\t$r_ptr,$r_ptr,#48\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadd\tcsp,csp,#288\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_mont_384x,.-mul_mont_384x\n\n.globl\tsqr_mont_384x\n.hidden\tsqr_mont_384x\n.type\tsqr_mont_384x,%function\n.align\t5\nsqr_mont_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc3,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tcsp,csp,#96\t\t// space for 2 384-bit vectors\n\tmov\t$n0,$n_ptr\t\t// adjust for missing b_ptr\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tcadd\t$b_ptr,$a_ptr,#48\n\tcadd\t$r_ptr,sp,#0\n\tbl\t__add_mod_384\t\t// t0 = a->re + a->im\n\n\tcadd\t$r_ptr,sp,#48\n\tbl\t__sub_mod_384\t\t// t1 = a->re - a->im\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldr\t$bi,        [$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\t@a[0],@a[0],@a[0]\t// add with itself\n\tadcs\t@a[1],@a[1],@a[1]\n\tadcs\t@a[2],@a[2],@a[2]\n\tadcs\t@a[3],@a[3],@a[3]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t@acc[6],xzr,xzr\n\n\tsubs\t@acc[0],@a[0],@mod[0]\n\tsbcs\t@acc[1],@a[1],@mod[1]\n\tsbcs\t@acc[2],@a[2],@mod[2]\n\tsbcs\t@acc[3],@a[3],@mod[3]\n\tsbcs\t@acc[4],@a[4],@mod[4]\n\tsbcs\t@acc[5],@a[5],@mod[5]\n\tsbcs\txzr,@acc[6],xzr\n\n\tcsel\t@acc[0],@a[0],@acc[0],lo\n\tcsel\t@acc[1],@a[1],@acc[1],lo\n\tcsel\t@acc[2],@a[2],@acc[2],lo\n\t ldp\t@a[0],@a[1],[sp]\n\tcsel\t@acc[3],@a[3],@acc[3],lo\n\t ldr\t$bi,        [sp,#48]\n\tcsel\t@acc[4],@a[4],@acc[4],lo\n\t ldp\t@a[2],@a[3],[sp,#16]\n\tcsel\t@acc[5],@a[5],@acc[5],lo\n\t ldp\t@a[4],@a[5],[sp,#32]\n\n\tstp\t@acc[0],@acc[1],[$b_ptr,#48]\n\tstp\t@acc[2],@acc[3],[$b_ptr,#64]\n\tstp\t@acc[4],@acc[5],[$b_ptr,#80]\n\n\tcadd\t$b_ptr,sp,#48\n\tbl\t__mul_mont_384\t\t// mul_mont_384(ret->re, t0, t1)\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$b_ptr]\n\tstp\t@a[2],@a[3],[$b_ptr,#16]\n\tstp\t@a[4],@a[5],[$b_ptr,#32]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_mont_384x,.-sqr_mont_384x\n\n.globl\tmul_mont_384\n.hidden\tmul_mont_384\n.type\tmul_mont_384,%function\n.align\t5\nmul_mont_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc4,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldr\t$bi,        [$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tbl\t__mul_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$b_ptr]\n\tstp\t@a[2],@a[3],[$b_ptr,#16]\n\tstp\t@a[4],@a[5],[$b_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_mont_384,.-mul_mont_384\n\n.type\t__mul_mont_384,%function\n.align\t5\n__mul_mont_384:\n\tmul\t@acc[0],@a[0],$bi\n\tmul\t@acc[1],@a[1],$bi\n\tmul\t@acc[2],@a[2],$bi\n\tmul\t@acc[3],@a[3],$bi\n\tmul\t@acc[4],@a[4],$bi\n\tmul\t@acc[5],@a[5],$bi\n\tmul\t$n0,$n0,@acc[0]\n\n\t umulh\t@tmp[0],@a[0],$bi\n\t umulh\t@tmp[1],@a[1],$bi\n\t umulh\t@tmp[2],@a[2],$bi\n\t umulh\t@tmp[3],@a[3],$bi\n\t umulh\t@tmp[4],@a[4],$bi\n\t umulh\t@tmp[5],@a[5],$bi\n\n\t adds\t@acc[1],@acc[1],@tmp[0]\n\t// mul\t@tmp[0],@mod[0],$n0\n\t adcs\t@acc[2],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@mod[1],$n0\n\t adcs\t@acc[3],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@mod[2],$n0\n\t adcs\t@acc[4],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@mod[3],$n0\n\t adcs\t@acc[5],@acc[5],@tmp[4]\n\tmul\t@tmp[4],@mod[4],$n0\n\t adc\t@acc[6],xzr,    @tmp[5]\n\tmul\t@tmp[5],@mod[5],$n0\n\t mov\t$bi,xzr\n___\nfor ($i=1;$i<6;$i++) {\n$code.=<<___;\n\tsubs\txzr,@acc[0],#1\t\t// adds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$n0\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$n0\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$n0\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$n0\n\tadcs\t@acc[4],@acc[4],@tmp[4]\n\t umulh\t@tmp[4],@mod[4],$n0\n\tadcs\t@acc[5],@acc[5],@tmp[5]\n\t umulh\t@tmp[5],@mod[5],$n0\n\tadcs\t@acc[6],@acc[6],xzr\n\tadc\t$n0,$bi,xzr\n\tldr\t$bi,[$b_ptr,8*$i]\n\n\t adds\t@acc[0],@acc[1],@tmp[0]\n\tmul\t@tmp[0],@a[0],$bi\n\t adcs\t@acc[1],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@a[1],$bi\n\t adcs\t@acc[2],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@a[2],$bi\n\t adcs\t@acc[3],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@a[3],$bi\n\t adcs\t@acc[4],@acc[5],@tmp[4]\n\tmul\t@tmp[4],@a[4],$bi\n\t adcs\t@acc[5],@acc[6],@tmp[5]\n\tmul\t@tmp[5],@a[5],$bi\n\t adc\t@acc[6],$n0,xzr\n\tldr\t$n0,[x29,#12*__SIZEOF_POINTER__]\n\n\tadds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@a[0],$bi\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@a[1],$bi\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\tmul\t$n0,$n0,@acc[0]\n\t umulh\t@tmp[2],@a[2],$bi\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@a[3],$bi\n\tadcs\t@acc[4],@acc[4],@tmp[4]\n\t umulh\t@tmp[4],@a[4],$bi\n\tadcs\t@acc[5],@acc[5],@tmp[5]\n\t umulh\t@tmp[5],@a[5],$bi\n\tadcs\t@acc[6],@acc[6],xzr\n\tadc\t$bi,xzr,xzr\n\n\t adds\t@acc[1],@acc[1],@tmp[0]\n\t// mul\t@tmp[0],@mod[0],$n0\n\t adcs\t@acc[2],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@mod[1],$n0\n\t adcs\t@acc[3],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@mod[2],$n0\n\t adcs\t@acc[4],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@mod[3],$n0\n\t adcs\t@acc[5],@acc[5],@tmp[4]\n\tmul\t@tmp[4],@mod[4],$n0\n\t adcs\t@acc[6],@acc[6],@tmp[5]\n\tmul\t@tmp[5],@mod[5],$n0\n\t adc\t$bi,$bi,xzr\n___\n}\n$code.=<<___;\n\tsubs\txzr,@acc[0],#1\t\t// adds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$n0\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$n0\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$n0\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$n0\n\tadcs\t@acc[4],@acc[4],@tmp[4]\n\t umulh\t@tmp[4],@mod[4],$n0\n\tadcs\t@acc[5],@acc[5],@tmp[5]\n\t umulh\t@tmp[5],@mod[5],$n0\n\tadcs\t@acc[6],@acc[6],xzr\n\t ldp\tc4,c2,[c29,#12*__SIZEOF_POINTER__]\t// pull r_ptr\n\tadc\t$bi,$bi,xzr\n\n\t adds\t@acc[0],@acc[1],@tmp[0]\n\t adcs\t@acc[1],@acc[2],@tmp[1]\n\t adcs\t@acc[2],@acc[3],@tmp[2]\n\t adcs\t@acc[3],@acc[4],@tmp[3]\n\t adcs\t@acc[4],@acc[5],@tmp[4]\n\t adcs\t@acc[5],@acc[6],@tmp[5]\n\t adc\t@acc[6],$bi,xzr\n\n\tsubs\t@tmp[0],@acc[0],@mod[0]\n\tsbcs\t@tmp[1],@acc[1],@mod[1]\n\tsbcs\t@tmp[2],@acc[2],@mod[2]\n\tsbcs\t@tmp[3],@acc[3],@mod[3]\n\tsbcs\t@tmp[4],@acc[4],@mod[4]\n\tsbcs\t@tmp[5],@acc[5],@mod[5]\n\tsbcs\txzr,    @acc[6],xzr\n\n\tcsel\t@a[0],@acc[0],@tmp[0],lo\n\tcsel\t@a[1],@acc[1],@tmp[1],lo\n\tcsel\t@a[2],@acc[2],@tmp[2],lo\n\tcsel\t@a[3],@acc[3],@tmp[3],lo\n\tcsel\t@a[4],@acc[4],@tmp[4],lo\n\tcsel\t@a[5],@acc[5],@tmp[5],lo\n\tret\n.size\t__mul_mont_384,.-__mul_mont_384\n\n.globl\tsqr_mont_384\n.hidden\tsqr_mont_384\n.type\tsqr_mont_384,%function\n.align\t5\nsqr_mont_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp,csp,#96\t\t// space for 768-bit vector\n\tcmov\t$n0,$n_ptr\t\t// adjust for missing b_ptr\n\n\tcmov\t$n_ptr,$r_ptr\t\t// save r_ptr\n\tcmov\t$r_ptr,sp\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tbl\t__sqr_384\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tcmov\t$a_ptr,sp\n\tcmov\t$r_ptr,$n_ptr\t\t// restore r_ptr\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_mont_384,.-sqr_mont_384\n\n.globl\tsqr_n_mul_mont_383\n.hidden\tsqr_n_mul_mont_383\n.type\tsqr_n_mul_mont_383,%function\n.align\t5\nsqr_n_mul_mont_383:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc4,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tcsp,csp,#96\t\t// space for 768-bit vector\n\tcmov\t$bi,x5\t\t\t// save b_ptr\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tcmov\t$r_ptr,sp\n.Loop_sqr_383:\n\tbl\t__sqr_384\n\tsub\t$b_ptr,$b_ptr,#1\t// counter\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tcmov\t$a_ptr,sp\n\tbl\t__mul_by_1_mont_384\n\n\tldp\t@acc[0],@acc[1],[$a_ptr,#48]\n\tldp\t@acc[2],@acc[3],[$a_ptr,#64]\n\tldp\t@acc[4],@acc[5],[$a_ptr,#80]\n\n\tadds\t@a[0],@a[0],@acc[0]\t// just accumulate upper half\n\tadcs\t@a[1],@a[1],@acc[1]\n\tadcs\t@a[2],@a[2],@acc[2]\n\tadcs\t@a[3],@a[3],@acc[3]\n\tadcs\t@a[4],@a[4],@acc[4]\n\tadc\t@a[5],@a[5],@acc[5]\n\n\tcbnz\t$b_ptr,.Loop_sqr_383\n\n\tcmov\t$b_ptr,$bi\n\tldr\t$bi,[$bi]\n\tbl\t__mul_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tstp\t@a[0],@a[1],[$b_ptr]\n\tstp\t@a[2],@a[3],[$b_ptr,#16]\n\tstp\t@a[4],@a[5],[$b_ptr,#32]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_n_mul_mont_383,.-sqr_n_mul_mont_383\n___\n{\nmy @acc=(@acc,@tmp[0..2]);\n\n$code.=<<___;\n.type\t__sqr_384,%function\n.align\t5\n__sqr_384:\n\tmul\t@acc[0],@a[1],@a[0]\n\tmul\t@acc[1],@a[2],@a[0]\n\tmul\t@acc[2],@a[3],@a[0]\n\tmul\t@acc[3],@a[4],@a[0]\n\tmul\t@acc[4],@a[5],@a[0]\n\n\t umulh\t@mod[1],@a[1],@a[0]\n\t umulh\t@mod[2],@a[2],@a[0]\n\t umulh\t@mod[3],@a[3],@a[0]\n\t umulh\t@mod[4],@a[4],@a[0]\n\t adds\t@acc[1],@acc[1],@mod[1]\n\t umulh\t@mod[5],@a[5],@a[0]\n\t adcs\t@acc[2],@acc[2],@mod[2]\n\tmul\t@mod[2],@a[2],@a[1]\n\t adcs\t@acc[3],@acc[3],@mod[3]\n\tmul\t@mod[3],@a[3],@a[1]\n\t adcs\t@acc[4],@acc[4],@mod[4]\n\tmul\t@mod[4],@a[4],@a[1]\n\t adc\t@acc[5],xzr,    @mod[5]\n\tmul\t@mod[5],@a[5],@a[1]\n\n\tadds\t@acc[2],@acc[2],@mod[2]\n\t umulh\t@mod[2],@a[2],@a[1]\n\tadcs\t@acc[3],@acc[3],@mod[3]\n\t umulh\t@mod[3],@a[3],@a[1]\n\tadcs\t@acc[4],@acc[4],@mod[4]\n\t umulh\t@mod[4],@a[4],@a[1]\n\tadcs\t@acc[5],@acc[5],@mod[5]\n\t umulh\t@mod[5],@a[5],@a[1]\n\tadc\t@acc[6],xzr,xzr\n\n\t  mul\t@mod[0],@a[0],@a[0]\n\t adds\t@acc[3],@acc[3],@mod[2]\n\t  umulh\t@a[0],  @a[0],@a[0]\n\t adcs\t@acc[4],@acc[4],@mod[3]\n\tmul\t@mod[3],@a[3],@a[2]\n\t adcs\t@acc[5],@acc[5],@mod[4]\n\tmul\t@mod[4],@a[4],@a[2]\n\t adc\t@acc[6],@acc[6],@mod[5]\n\tmul\t@mod[5],@a[5],@a[2]\n\n\tadds\t@acc[4],@acc[4],@mod[3]\n\t umulh\t@mod[3],@a[3],@a[2]\n\tadcs\t@acc[5],@acc[5],@mod[4]\n\t umulh\t@mod[4],@a[4],@a[2]\n\tadcs\t@acc[6],@acc[6],@mod[5]\n\t umulh\t@mod[5],@a[5],@a[2]\n\tadc\t@acc[7],xzr,xzr\n\n\t  mul\t@mod[1],@a[1],@a[1]\n\t adds\t@acc[5],@acc[5],@mod[3]\n\t  umulh\t@a[1],  @a[1],@a[1]\n\t adcs\t@acc[6],@acc[6],@mod[4]\n\tmul\t@mod[4],@a[4],@a[3]\n\t adc\t@acc[7],@acc[7],@mod[5]\n\tmul\t@mod[5],@a[5],@a[3]\n\n\tadds\t@acc[6],@acc[6],@mod[4]\n\t umulh\t@mod[4],@a[4],@a[3]\n\tadcs\t@acc[7],@acc[7],@mod[5]\n\t umulh\t@mod[5],@a[5],@a[3]\n\tadc\t@acc[8],xzr,xzr\n\t  mul\t@mod[2],@a[2],@a[2]\n\t adds\t@acc[7],@acc[7],@mod[4]\n\t  umulh\t@a[2],  @a[2],@a[2]\n\t adc\t@acc[8],@acc[8],@mod[5]\n\t  mul\t@mod[3],@a[3],@a[3]\n\n\tmul\t@mod[5],@a[5],@a[4]\n\t  umulh\t@a[3],  @a[3],@a[3]\n\tadds\t@acc[8],@acc[8],@mod[5]\n\t umulh\t@mod[5],@a[5],@a[4]\n\t  mul\t@mod[4],@a[4],@a[4]\n\tadc\t@acc[9],@mod[5],xzr\n\n\tadds\t@acc[0],@acc[0],@acc[0]\n\tadcs\t@acc[1],@acc[1],@acc[1]\n\tadcs\t@acc[2],@acc[2],@acc[2]\n\tadcs\t@acc[3],@acc[3],@acc[3]\n\tadcs\t@acc[4],@acc[4],@acc[4]\n\tadcs\t@acc[5],@acc[5],@acc[5]\n\tadcs\t@acc[6],@acc[6],@acc[6]\n\tadcs\t@acc[7],@acc[7],@acc[7]\n\t  umulh\t@a[4],  @a[4],@a[4]\n\tadcs\t@acc[8],@acc[8],@acc[8]\n\t  mul\t@mod[5],@a[5],@a[5]\n\tadcs\t@acc[9],@acc[9],@acc[9]\n\t  umulh\t@a[5],  @a[5],@a[5]\n\tadc\t$a_ptr,xzr,xzr\n\n\tadds\t@acc[0],@acc[0],@a[0]\n\tadcs\t@acc[1],@acc[1],@mod[1]\n\tadcs\t@acc[2],@acc[2],@a[1]\n\tadcs\t@acc[3],@acc[3],@mod[2]\n\tadcs\t@acc[4],@acc[4],@a[2]\n\tadcs\t@acc[5],@acc[5],@mod[3]\n\tadcs\t@acc[6],@acc[6],@a[3]\n\tstp\t@mod[0],@acc[0],[$r_ptr]\n\tadcs\t@acc[7],@acc[7],@mod[4]\n\tstp\t@acc[1],@acc[2],[$r_ptr,#16]\n\tadcs\t@acc[8],@acc[8],@a[4]\n\tstp\t@acc[3],@acc[4],[$r_ptr,#32]\n\tadcs\t@acc[9],@acc[9],@mod[5]\n\tstp\t@acc[5],@acc[6],[$r_ptr,#48]\n\tadc\t@a[5],@a[5],$a_ptr\n\tstp\t@acc[7],@acc[8],[$r_ptr,#64]\n\tstp\t@acc[9],@a[5],[$r_ptr,#80]\n\n\tret\n.size\t__sqr_384,.-__sqr_384\n___\n}\n$code.=<<___;\n.globl\tsqr_384\n.hidden\tsqr_384\n.type\tsqr_384,%function\n.align\t5\nsqr_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tbl\t__sqr_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_384,.-sqr_384\n\n.globl\tredc_mont_384\n.hidden\tredc_mont_384\n.type\tredc_mont_384,%function\n.align\t5\nredc_mont_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tmov\t$n0,$n_ptr\t\t// adjust for missing b_ptr\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tbl\t__redc_tail_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tredc_mont_384,.-redc_mont_384\n\n.globl\tfrom_mont_384\n.hidden\tfrom_mont_384\n.type\tfrom_mont_384,%function\n.align\t5\nfrom_mont_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tmov\t$n0,$n_ptr\t\t// adjust for missing b_ptr\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tsubs\t@acc[0],@a[0],@mod[0]\n\tsbcs\t@acc[1],@a[1],@mod[1]\n\tsbcs\t@acc[2],@a[2],@mod[2]\n\tsbcs\t@acc[3],@a[3],@mod[3]\n\tsbcs\t@acc[4],@a[4],@mod[4]\n\tsbcs\t@acc[5],@a[5],@mod[5]\n\n\tcsel\t@a[0],@a[0],@acc[0],lo\n\tcsel\t@a[1],@a[1],@acc[1],lo\n\tcsel\t@a[2],@a[2],@acc[2],lo\n\tcsel\t@a[3],@a[3],@acc[3],lo\n\tcsel\t@a[4],@a[4],@acc[4],lo\n\tcsel\t@a[5],@a[5],@acc[5],lo\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tfrom_mont_384,.-from_mont_384\n\n.type\t__mul_by_1_mont_384,%function\n.align\t5\n__mul_by_1_mont_384:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tmul\t@tmp[0],$n0,@a[0]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\t// mul\t@acc[0],@mod[0],@tmp[0]\n\tmul\t@acc[1],@mod[1],@tmp[0]\n\tmul\t@acc[2],@mod[2],@tmp[0]\n\tmul\t@acc[3],@mod[3],@tmp[0]\n\tmul\t@acc[4],@mod[4],@tmp[0]\n\tmul\t@acc[5],@mod[5],@tmp[0]\n\tsubs\txzr,@a[0],#1\t\t// adds\t@acc[0],@acc[0],@a[0]\n\t umulh\t@a[0],@mod[0],@tmp[0]\n\tadcs\t@acc[1],@acc[1],@a[1]\n\t umulh\t@a[1],@mod[1],@tmp[0]\n\tadcs\t@acc[2],@acc[2],@a[2]\n\t umulh\t@a[2],@mod[2],@tmp[0]\n\tadcs\t@acc[3],@acc[3],@a[3]\n\t umulh\t@a[3],@mod[3],@tmp[0]\n\tadcs\t@acc[4],@acc[4],@a[4]\n\t umulh\t@a[4],@mod[4],@tmp[0]\n\tadcs\t@acc[5],@acc[5],@a[5]\n\t umulh\t@a[5],@mod[5],@tmp[0]\n\tadc\t@acc[6],xzr,xzr\n___\nfor ($i=1;$i<6;$i++) {\n$code.=<<___;\n\t adds\t@a[0],@a[0],@acc[1]\n\t adcs\t@a[1],@a[1],@acc[2]\n\t adcs\t@a[2],@a[2],@acc[3]\n\tmul\t@tmp[0],$n0,@a[0]\n\t adcs\t@a[3],@a[3],@acc[4]\n\t adcs\t@a[4],@a[4],@acc[5]\n\t adc\t@a[5],@a[5],@acc[6]\n\n\t// mul\t@acc[0],@mod[0],@tmp[0]\n\tmul\t@acc[1],@mod[1],@tmp[0]\n\tmul\t@acc[2],@mod[2],@tmp[0]\n\tmul\t@acc[3],@mod[3],@tmp[0]\n\tmul\t@acc[4],@mod[4],@tmp[0]\n\tmul\t@acc[5],@mod[5],@tmp[0]\n\tsubs\txzr,@a[0],#1\t\t// adds\t@acc[0],@acc[0],@a[0]\n\t umulh\t@a[0],@mod[0],@tmp[0]\n\tadcs\t@acc[1],@acc[1],@a[1]\n\t umulh\t@a[1],@mod[1],@tmp[0]\n\tadcs\t@acc[2],@acc[2],@a[2]\n\t umulh\t@a[2],@mod[2],@tmp[0]\n\tadcs\t@acc[3],@acc[3],@a[3]\n\t umulh\t@a[3],@mod[3],@tmp[0]\n\tadcs\t@acc[4],@acc[4],@a[4]\n\t umulh\t@a[4],@mod[4],@tmp[0]\n\tadcs\t@acc[5],@acc[5],@a[5]\n\t umulh\t@a[5],@mod[5],@tmp[0]\n\tadc\t@acc[6],xzr,xzr\n___\n}\n$code.=<<___;\n\tadds\t@a[0],@a[0],@acc[1]\n\tadcs\t@a[1],@a[1],@acc[2]\n\tadcs\t@a[2],@a[2],@acc[3]\n\tadcs\t@a[3],@a[3],@acc[4]\n\tadcs\t@a[4],@a[4],@acc[5]\n\tadc\t@a[5],@a[5],@acc[6]\n\n\tret\n.size\t__mul_by_1_mont_384,.-__mul_by_1_mont_384\n\n.type\t__redc_tail_mont_384,%function\n.align\t5\n__redc_tail_mont_384:\n\tldp\t@acc[0],@acc[1],[$a_ptr,#48]\n\tldp\t@acc[2],@acc[3],[$a_ptr,#64]\n\tldp\t@acc[4],@acc[5],[$a_ptr,#80]\n\n\tadds\t@a[0],@a[0],@acc[0]\t// accumulate upper half\n\tadcs\t@a[1],@a[1],@acc[1]\n\tadcs\t@a[2],@a[2],@acc[2]\n\tadcs\t@a[3],@a[3],@acc[3]\n\tadcs\t@a[4],@a[4],@acc[4]\n\tadcs\t@a[5],@a[5],@acc[5]\n\tadc\t@acc[6],xzr,xzr\n\n\tsubs\t@acc[0],@a[0],@mod[0]\n\tsbcs\t@acc[1],@a[1],@mod[1]\n\tsbcs\t@acc[2],@a[2],@mod[2]\n\tsbcs\t@acc[3],@a[3],@mod[3]\n\tsbcs\t@acc[4],@a[4],@mod[4]\n\tsbcs\t@acc[5],@a[5],@mod[5]\n\tsbcs\txzr,@acc[6],xzr\n\n\tcsel\t@a[0],@a[0],@acc[0],lo\n\tcsel\t@a[1],@a[1],@acc[1],lo\n\tcsel\t@a[2],@a[2],@acc[2],lo\n\tcsel\t@a[3],@a[3],@acc[3],lo\n\tcsel\t@a[4],@a[4],@acc[4],lo\n\tcsel\t@a[5],@a[5],@acc[5],lo\n\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\n\tret\n.size\t__redc_tail_mont_384,.-__redc_tail_mont_384\n\n.globl\tmul_384\n.hidden\tmul_384\n.type\tmul_384,%function\n.align\t5\nmul_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tbl\t__mul_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_384,.-mul_384\n\n.type\t__mul_384,%function\n.align\t5\n__mul_384:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldr\t$bi,        [$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tmul\t@acc[0],@a[0],$bi\n\tmul\t@acc[1],@a[1],$bi\n\tmul\t@acc[2],@a[2],$bi\n\tmul\t@acc[3],@a[3],$bi\n\tmul\t@acc[4],@a[4],$bi\n\tmul\t@acc[5],@a[5],$bi\n\n\t umulh\t@mod[0],@a[0],$bi\n\t umulh\t@mod[1],@a[1],$bi\n\t umulh\t@mod[2],@a[2],$bi\n\t umulh\t@mod[3],@a[3],$bi\n\t umulh\t@mod[4],@a[4],$bi\n\t umulh\t@mod[5],@a[5],$bi\n\tldr\t$bi,[$b_ptr,8*1]\n\n\tstr\t@acc[0],[$r_ptr]\n\t adds\t@acc[0],@acc[1],@mod[0]\n\tmul\t@mod[0],@a[0],$bi\n\t adcs\t@acc[1],@acc[2],@mod[1]\n\tmul\t@mod[1],@a[1],$bi\n\t adcs\t@acc[2],@acc[3],@mod[2]\n\tmul\t@mod[2],@a[2],$bi\n\t adcs\t@acc[3],@acc[4],@mod[3]\n\tmul\t@mod[3],@a[3],$bi\n\t adcs\t@acc[4],@acc[5],@mod[4]\n\tmul\t@mod[4],@a[4],$bi\n\t adc\t@acc[5],xzr,    @mod[5]\n\tmul\t@mod[5],@a[5],$bi\n___\nfor ($i=1;$i<5;$i++) {\n$code.=<<___;\n\tadds\t@acc[0],@acc[0],@mod[0]\n\t umulh\t@mod[0],@a[0],$bi\n\tadcs\t@acc[1],@acc[1],@mod[1]\n\t umulh\t@mod[1],@a[1],$bi\n\tadcs\t@acc[2],@acc[2],@mod[2]\n\t umulh\t@mod[2],@a[2],$bi\n\tadcs\t@acc[3],@acc[3],@mod[3]\n\t umulh\t@mod[3],@a[3],$bi\n\tadcs\t@acc[4],@acc[4],@mod[4]\n\t umulh\t@mod[4],@a[4],$bi\n\tadcs\t@acc[5],@acc[5],@mod[5]\n\t umulh\t@mod[5],@a[5],$bi\n\tldr\t$bi,[$b_ptr,#8*($i+1)]\n\tadc\t@acc[6],xzr,xzr\n\n\tstr\t@acc[0],[$r_ptr,8*$i]\n\t adds\t@acc[0],@acc[1],@mod[0]\n\tmul\t@mod[0],@a[0],$bi\n\t adcs\t@acc[1],@acc[2],@mod[1]\n\tmul\t@mod[1],@a[1],$bi\n\t adcs\t@acc[2],@acc[3],@mod[2]\n\tmul\t@mod[2],@a[2],$bi\n\t adcs\t@acc[3],@acc[4],@mod[3]\n\tmul\t@mod[3],@a[3],$bi\n\t adcs\t@acc[4],@acc[5],@mod[4]\n\tmul\t@mod[4],@a[4],$bi\n\t adc\t@acc[5],@acc[6],@mod[5]\n\tmul\t@mod[5],@a[5],$bi\n___\n}\n$code.=<<___;\n\tadds\t@acc[0],@acc[0],@mod[0]\n\t umulh\t@mod[0],@a[0],$bi\n\tadcs\t@acc[1],@acc[1],@mod[1]\n\t umulh\t@mod[1],@a[1],$bi\n\tadcs\t@acc[2],@acc[2],@mod[2]\n\t umulh\t@mod[2],@a[2],$bi\n\tadcs\t@acc[3],@acc[3],@mod[3]\n\t umulh\t@mod[3],@a[3],$bi\n\tadcs\t@acc[4],@acc[4],@mod[4]\n\t umulh\t@mod[4],@a[4],$bi\n\tadcs\t@acc[5],@acc[5],@mod[5]\n\t umulh\t@mod[5],@a[5],$bi\n\tadc\t@acc[6],xzr,xzr\n\n\tstr\t@acc[0],[$r_ptr,8*$i]\n\t adds\t@acc[0],@acc[1],@mod[0]\n\t adcs\t@acc[1],@acc[2],@mod[1]\n\t adcs\t@acc[2],@acc[3],@mod[2]\n\t adcs\t@acc[3],@acc[4],@mod[3]\n\t adcs\t@acc[4],@acc[5],@mod[4]\n\t adc\t@acc[5],@acc[6],@mod[5]\n\n\tstp\t@acc[0],@acc[1],[$r_ptr,#48]\n\tstp\t@acc[2],@acc[3],[$r_ptr,#64]\n\tstp\t@acc[4],@acc[5],[$r_ptr,#80]\n\n\tret\n.size\t__mul_384,.-__mul_384\n\n.globl\tmul_382x\n.hidden\tmul_382x\n.type\tmul_382x,%function\n.align\t5\nmul_382x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tsub\tcsp,csp,#96\t\t// space for two 384-bit vectors\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tcmov\t@tmp[0],$r_ptr\t\t// save r_ptr\n\tldp\t@acc[0],@acc[1],[$a_ptr,#48]\n\tcmov\t@tmp[1],$a_ptr\t\t// save a_ptr\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tcmov\t@tmp[2],$b_ptr\t\t// save b_ptr\n\tldp\t@acc[2],@acc[3],[$a_ptr,#64]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tadds\t@mod[0],$a[0],@acc[0]\t// t0 = a->re + a->im\n\tldp\t@acc[4],@acc[5],[$a_ptr,#80]\n\tadcs\t@mod[1],$a[1],@acc[1]\n\t ldp\t@a[0],@a[1],[$b_ptr]\n\tadcs\t@mod[2],$a[2],@acc[2]\n\t ldp\t@acc[0],@acc[1],[$b_ptr,#48]\n\tadcs\t@mod[3],$a[3],@acc[3]\n\t ldp\t@a[2],@a[3],[$b_ptr,#16]\n\tadcs\t@mod[4],$a[4],@acc[4]\n\t ldp\t@acc[2],@acc[3],[$b_ptr,#64]\n\tadc\t@mod[5],$a[5],@acc[5]\n\t ldp\t@a[4],@a[5],[$b_ptr,#32]\n\n\tstp\t@mod[0],@mod[1],[sp]\n\t adds\t@mod[0],$a[0],@acc[0]\t// t1 = b->re + b->im\n\t ldp\t@acc[4],@acc[5],[$b_ptr,#80]\n\t adcs\t@mod[1],$a[1],@acc[1]\n\tstp\t@mod[2],@mod[3],[sp,#16]\n\t adcs\t@mod[2],$a[2],@acc[2]\n\t adcs\t@mod[3],$a[3],@acc[3]\n\t stp\t@mod[4],@mod[5],[sp,#32]\n\t adcs\t@mod[4],$a[4],@acc[4]\n\t stp\t@mod[0],@mod[1],[sp,#48]\n\t adc\t@mod[5],$a[5],@acc[5]\n\t stp\t@mod[2],@mod[3],[sp,#64]\n\t stp\t@mod[4],@mod[5],[sp,#80]\n\n\tbl\t__mul_384\t\t// mul_384(ret->re, a->re, b->re)\n\n\tcadd\t$a_ptr,sp,#0\t\t// mul_384(ret->im, t0, t1)\n\tcadd\t$b_ptr,sp,#48\n\tcadd\t$r_ptr,@tmp[0],#96\n\tbl\t__mul_384\n\n\tcadd\t$a_ptr,@tmp[1],#48\t// mul_384(tx, a->im, b->im)\n\tcadd\t$b_ptr,@tmp[2],#48\n\tcadd\t$r_ptr,sp,#0\n\tbl\t__mul_384\n\n\tldp\t@mod[0],@mod[1],[$n_ptr]\n\tldp\t@mod[2],@mod[3],[$n_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$n_ptr,#32]\n\n\tcadd\t$a_ptr,@tmp[0],#96\t// ret->im -= tx\n\tcadd\t$b_ptr,sp,#0\n\tcadd\t$r_ptr,@tmp[0],#96\n\tbl\t__sub_mod_384x384\n\n\tcadd\t$b_ptr,@tmp[0],#0\t// ret->im -= ret->re\n\tbl\t__sub_mod_384x384\n\n\tcadd\t$a_ptr,@tmp[0],#0\t// ret->re -= tx\n\tcadd\t$b_ptr,sp,#0\n\tcadd\t$r_ptr,@tmp[0],#0\n\tbl\t__sub_mod_384x384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tadd\tcsp,csp,#96\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tmul_382x,.-mul_382x\n\n.globl\tsqr_382x\n.hidden\tsqr_382x\n.type\tsqr_382x,%function\n.align\t5\nsqr_382x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@acc[0],@acc[1],[$a_ptr,#48]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tadds\t@mod[0],$a[0],@acc[0]\t// t0 = a->re + a->im\n\tldp\t@acc[2],@acc[3],[$a_ptr,#64]\n\tadcs\t@mod[1],$a[1],@acc[1]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tadcs\t@mod[2],$a[2],@acc[2]\n\tldp\t@acc[4],@acc[5],[$a_ptr,#80]\n\tadcs\t@mod[3],$a[3],@acc[3]\n\tstp\t@mod[0],@mod[1],[$r_ptr]\n\tadcs\t@mod[4],$a[4],@acc[4]\n\t ldp\t@mod[0],@mod[1],[$b_ptr]\n\tadc\t@mod[5],$a[5],@acc[5]\n\tstp\t@mod[2],@mod[3],[$r_ptr,#16]\n\n\tsubs\t@a[0],$a[0],@acc[0]\t// t1 = a->re - a->im\n\t ldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tsbcs\t@a[1],$a[1],@acc[1]\n\tstp\t@mod[4],@mod[5],[$r_ptr,#32]\n\tsbcs\t@a[2],$a[2],@acc[2]\n\t ldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\tsbcs\t@a[3],$a[3],@acc[3]\n\tsbcs\t@a[4],$a[4],@acc[4]\n\tsbcs\t@a[5],$a[5],@acc[5]\n\tsbc\t@acc[6],xzr,xzr\n\n\t and\t@acc[0],@mod[0],@acc[6]\n\t and\t@acc[1],@mod[1],@acc[6]\n\tadds\t@a[0],@a[0],@acc[0]\n\t and\t@acc[2],@mod[2],@acc[6]\n\tadcs\t@a[1],@a[1],@acc[1]\n\t and\t@acc[3],@mod[3],@acc[6]\n\tadcs\t@a[2],@a[2],@acc[2]\n\t and\t@acc[4],@mod[4],@acc[6]\n\tadcs\t@a[3],@a[3],@acc[3]\n\t and\t@acc[5],@mod[5],@acc[6]\n\tadcs\t@a[4],@a[4],@acc[4]\n\tstp\t@a[0],@a[1],[$r_ptr,#48]\n\tadc\t@a[5],@a[5],@acc[5]\n\tstp\t@a[2],@a[3],[$r_ptr,#64]\n\tstp\t@a[4],@a[5],[$r_ptr,#80]\n\n\tcmov\t$n0,$a_ptr\t\t// save a_ptr\n\tcadd\t$a_ptr,$r_ptr,#0\t// mul_384(ret->re, t0, t1)\n\tcadd\t$b_ptr,$r_ptr,#48\n\tbl\t__mul_384\n\n\tcadd\t$a_ptr,$n0,#0\t\t// mul_384(ret->im, a->re, a->im)\n\tcadd\t$b_ptr,$n0,#48\n\tcadd\t$r_ptr,$r_ptr,#96\n\tbl\t__mul_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldp\t@a[0],@a[1],[$r_ptr]\n\tldp\t@a[2],@a[3],[$r_ptr,#16]\n\tadds\t@a[0],@a[0],@a[0]\t// add with itself\n\tldp\t@a[4],@a[5],[$r_ptr,#32]\n\tadcs\t@a[1],@a[1],@a[1]\n\tadcs\t@a[2],@a[2],@a[2]\n\tadcs\t@a[3],@a[3],@a[3]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadcs\t@acc[0],@acc[0],@acc[0]\n\tadcs\t@acc[1],@acc[1],@acc[1]\n\tstp\t@a[0],@a[1],[$r_ptr]\n\tadcs\t@acc[2],@acc[2],@acc[2]\n\tstp\t@a[2],@a[3],[$r_ptr,#16]\n\tadcs\t@acc[3],@acc[3],@acc[3]\n\tstp\t@a[4],@a[5],[$r_ptr,#32]\n\tadcs\t@acc[4],@acc[4],@acc[4]\n\tstp\t@acc[0],@acc[1],[$r_ptr,#48]\n\tadc\t@acc[5],@acc[5],@acc[5]\n\tstp\t@acc[2],@acc[3],[$r_ptr,#64]\n\tstp\t@acc[4],@acc[5],[$r_ptr,#80]\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_382x,.-sqr_382x\n\n.globl\tsqr_mont_382x\n.hidden\tsqr_mont_382x\n.type\tsqr_mont_382x,%function\n.align\t5\nsqr_mont_382x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\tstp\tc3,c0,[csp,#12*__SIZEOF_POINTER__]\t// __mul_mont_384 wants them there\n\tsub\tcsp,csp,#112\t\t// space for two 384-bit vectors + word\n\tmov\t$n0,$n_ptr\t\t// adjust for missing b_ptr\n\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\n\tldp\t$bi,@acc[1],[$a_ptr,#48]\n\tldp\t@acc[2],@acc[3],[$a_ptr,#64]\n\tldp\t@acc[4],@acc[5],[$a_ptr,#80]\n\n\tadds\t@mod[0],$a[0],$bi\t// t0 = a->re + a->im\n\tadcs\t@mod[1],$a[1],@acc[1]\n\tadcs\t@mod[2],$a[2],@acc[2]\n\tadcs\t@mod[3],$a[3],@acc[3]\n\tadcs\t@mod[4],$a[4],@acc[4]\n\tadc\t@mod[5],$a[5],@acc[5]\n\n\tsubs\t@acc[0],$a[0],$bi\t// t1 = a->re - a->im\n\tsbcs\t@acc[1],$a[1],@acc[1]\n\tsbcs\t@acc[2],$a[2],@acc[2]\n\tsbcs\t@acc[3],$a[3],@acc[3]\n\tsbcs\t@acc[4],$a[4],@acc[4]\n\tsbcs\t@acc[5],$a[5],@acc[5]\n\tsbc\t@acc[6],xzr,xzr\t\t// borrow flag as mask\n\n\tstp\t@mod[0],@mod[1],[sp]\n\tstp\t@mod[2],@mod[3],[sp,#16]\n\tstp\t@mod[4],@mod[5],[sp,#32]\n\tstp\t@acc[0],@acc[1],[sp,#48]\n\tstp\t@acc[2],@acc[3],[sp,#64]\n\tstp\t@acc[4],@acc[5],[sp,#80]\n\tstr\t@acc[6],[sp,#96]\n\n\tldp\t@mod[0],@mod[1],[$b_ptr]\n\tldp\t@mod[2],@mod[3],[$b_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$b_ptr,#32]\n\n\tcadd\t$b_ptr,$a_ptr,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, a->re, a->im)\n\n\tadds\t@acc[0],@a[0],@a[0]\t// add with itself\n\tadcs\t@acc[1],@a[1],@a[1]\n\tadcs\t@acc[2],@a[2],@a[2]\n\tadcs\t@acc[3],@a[3],@a[3]\n\tadcs\t@acc[4],@a[4],@a[4]\n\tadc\t@acc[5],@a[5],@a[5]\n\n\tstp\t@acc[0],@acc[1],[$b_ptr,#48]\n\tstp\t@acc[2],@acc[3],[$b_ptr,#64]\n\tstp\t@acc[4],@acc[5],[$b_ptr,#80]\n\n\tldp\t@a[0],@a[1],[sp]\n\tldr\t$bi,[sp,#48]\n\tldp\t@a[2],@a[3],[sp,#16]\n\tldp\t@a[4],@a[5],[sp,#32]\n\n\tcadd\t$b_ptr,sp,#48\n\tbl\t__mul_mont_383_nonred\t// mul_mont_384(ret->im, t0, t1)\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tldr\t@acc[6],[sp,#96]\t// account for sign from a->re - a->im\n\tldp\t@acc[0],@acc[1],[sp]\n\tldp\t@acc[2],@acc[3],[sp,#16]\n\tldp\t@acc[4],@acc[5],[sp,#32]\n\n\tand\t@acc[0],@acc[0],@acc[6]\n\tand\t@acc[1],@acc[1],@acc[6]\n\tand\t@acc[2],@acc[2],@acc[6]\n\tand\t@acc[3],@acc[3],@acc[6]\n\tand\t@acc[4],@acc[4],@acc[6]\n\tand\t@acc[5],@acc[5],@acc[6]\n\n\tsubs\t@a[0],@a[0],@acc[0]\n\tsbcs\t@a[1],@a[1],@acc[1]\n\tsbcs\t@a[2],@a[2],@acc[2]\n\tsbcs\t@a[3],@a[3],@acc[3]\n\tsbcs\t@a[4],@a[4],@acc[4]\n\tsbcs\t@a[5],@a[5],@acc[5]\n\tsbc\t@acc[6],xzr,xzr\n\n\tand\t@acc[0],@mod[0],@acc[6]\n\tand\t@acc[1],@mod[1],@acc[6]\n\tand\t@acc[2],@mod[2],@acc[6]\n\tand\t@acc[3],@mod[3],@acc[6]\n\tand\t@acc[4],@mod[4],@acc[6]\n\tand\t@acc[5],@mod[5],@acc[6]\n\n\tadds\t@a[0],@a[0],@acc[0]\n\tadcs\t@a[1],@a[1],@acc[1]\n\tadcs\t@a[2],@a[2],@acc[2]\n\tadcs\t@a[3],@a[3],@acc[3]\n\tadcs\t@a[4],@a[4],@acc[4]\n\tadc\t@a[5],@a[5],@acc[5]\n\n\tstp\t@a[0],@a[1],[$b_ptr]\n\tstp\t@a[2],@a[3],[$b_ptr,#16]\n\tstp\t@a[4],@a[5],[$b_ptr,#32]\n\n\tadd\tcsp,csp,#112\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsqr_mont_382x,.-sqr_mont_382x\n\n.type\t__mul_mont_383_nonred,%function\n.align\t5\n__mul_mont_383_nonred:\n\tmul\t@acc[0],@a[0],$bi\n\tmul\t@acc[1],@a[1],$bi\n\tmul\t@acc[2],@a[2],$bi\n\tmul\t@acc[3],@a[3],$bi\n\tmul\t@acc[4],@a[4],$bi\n\tmul\t@acc[5],@a[5],$bi\n\tmul\t$n0,$n0,@acc[0]\n\n\t umulh\t@tmp[0],@a[0],$bi\n\t umulh\t@tmp[1],@a[1],$bi\n\t umulh\t@tmp[2],@a[2],$bi\n\t umulh\t@tmp[3],@a[3],$bi\n\t umulh\t@tmp[4],@a[4],$bi\n\t umulh\t@tmp[5],@a[5],$bi\n\n\t adds\t@acc[1],@acc[1],@tmp[0]\n\tmul\t@tmp[0],@mod[0],$n0\n\t adcs\t@acc[2],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@mod[1],$n0\n\t adcs\t@acc[3],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@mod[2],$n0\n\t adcs\t@acc[4],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@mod[3],$n0\n\t adcs\t@acc[5],@acc[5],@tmp[4]\n\tmul\t@tmp[4],@mod[4],$n0\n\t adc\t@acc[6],xzr,    @tmp[5]\n\tmul\t@tmp[5],@mod[5],$n0\n___\nfor ($i=1;$i<6;$i++) {\n$code.=<<___;\n\tldr\t$bi,[$b_ptr,8*$i]\n\tadds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$n0\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$n0\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$n0\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$n0\n\tadcs\t@acc[4],@acc[4],@tmp[4]\n\t umulh\t@tmp[4],@mod[4],$n0\n\tadcs\t@acc[5],@acc[5],@tmp[5]\n\t umulh\t@tmp[5],@mod[5],$n0\n\tadc\t@acc[6],@acc[6],xzr\n\n\tldr\t$n0,[x29,#12*__SIZEOF_POINTER__]\n\t adds\t@acc[0],@acc[1],@tmp[0]\n\tmul\t@tmp[0],@a[0],$bi\n\t adcs\t@acc[1],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@a[1],$bi\n\t adcs\t@acc[2],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@a[2],$bi\n\t adcs\t@acc[3],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@a[3],$bi\n\t adcs\t@acc[4],@acc[5],@tmp[4]\n\tmul\t@tmp[4],@a[4],$bi\n\t adcs\t@acc[5],@acc[6],@tmp[5]\n\tmul\t@tmp[5],@a[5],$bi\n\t adc\t@acc[6],xzr,xzr\n\n\tadds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@a[0],$bi\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@a[1],$bi\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\tmul\t$n0,$n0,@acc[0]\n\t umulh\t@tmp[2],@a[2],$bi\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@a[3],$bi\n\tadcs\t@acc[4],@acc[4],@tmp[4]\n\t umulh\t@tmp[4],@a[4],$bi\n\tadcs\t@acc[5],@acc[5],@tmp[5]\n\t umulh\t@tmp[5],@a[5],$bi\n\tadc\t@acc[6],@acc[6],xzr\n\n\t adds\t@acc[1],@acc[1],@tmp[0]\n\tmul\t@tmp[0],@mod[0],$n0\n\t adcs\t@acc[2],@acc[2],@tmp[1]\n\tmul\t@tmp[1],@mod[1],$n0\n\t adcs\t@acc[3],@acc[3],@tmp[2]\n\tmul\t@tmp[2],@mod[2],$n0\n\t adcs\t@acc[4],@acc[4],@tmp[3]\n\tmul\t@tmp[3],@mod[3],$n0\n\t adcs\t@acc[5],@acc[5],@tmp[4]\n\tmul\t@tmp[4],@mod[4],$n0\n\t adc\t@acc[6],@acc[6],@tmp[5]\n\tmul\t@tmp[5],@mod[5],$n0\n___\n}\n$code.=<<___;\n\tadds\t@acc[0],@acc[0],@tmp[0]\n\t umulh\t@tmp[0],@mod[0],$n0\n\tadcs\t@acc[1],@acc[1],@tmp[1]\n\t umulh\t@tmp[1],@mod[1],$n0\n\tadcs\t@acc[2],@acc[2],@tmp[2]\n\t umulh\t@tmp[2],@mod[2],$n0\n\tadcs\t@acc[3],@acc[3],@tmp[3]\n\t umulh\t@tmp[3],@mod[3],$n0\n\tadcs\t@acc[4],@acc[4],@tmp[4]\n\t umulh\t@tmp[4],@mod[4],$n0\n\tadcs\t@acc[5],@acc[5],@tmp[5]\n\t umulh\t@tmp[5],@mod[5],$n0\n\tadc\t@acc[6],@acc[6],xzr\n\t ldp\tc4,c2,[c29,#12*__SIZEOF_POINTER__]\t\t// pull r_ptr\n\n\t adds\t@a[0],@acc[1],@tmp[0]\n\t adcs\t@a[1],@acc[2],@tmp[1]\n\t adcs\t@a[2],@acc[3],@tmp[2]\n\t adcs\t@a[3],@acc[4],@tmp[3]\n\t adcs\t@a[4],@acc[5],@tmp[4]\n\t adcs\t@a[5],@acc[6],@tmp[5]\n\n\tret\n.size\t__mul_mont_383_nonred,.-__mul_mont_383_nonred\n\n.globl\tsgn0_pty_mont_384\n.hidden\tsgn0_pty_mont_384\n.type\tsgn0_pty_mont_384,%function\n.align\t5\nsgn0_pty_mont_384:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tmov\t$n0,$b_ptr\n\tldp\t@mod[0],@mod[1],[$a_ptr]\n\tldp\t@mod[2],@mod[3],[$a_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$a_ptr,#32]\n\tcmov\t$a_ptr,$r_ptr\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tand\t$r_ptr,@a[0],#1\n\tadds\t@a[0],@a[0],@a[0]\n\tadcs\t@a[1],@a[1],@a[1]\n\tadcs\t@a[2],@a[2],@a[2]\n\tadcs\t@a[3],@a[3],@a[3]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t$bi,xzr,xzr\n\n\tsubs\t@a[0],@a[0],@mod[0]\n\tsbcs\t@a[1],@a[1],@mod[1]\n\tsbcs\t@a[2],@a[2],@mod[2]\n\tsbcs\t@a[3],@a[3],@mod[3]\n\tsbcs\t@a[4],@a[4],@mod[4]\n\tsbcs\t@a[5],@a[5],@mod[5]\n\tsbc\t$bi,$bi,xzr\n\n\tmvn\t$bi,$bi\n\tand\t$bi,$bi,#2\n\torr\t$r_ptr,$r_ptr,$bi\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsgn0_pty_mont_384,.-sgn0_pty_mont_384\n\n.globl\tsgn0_pty_mont_384x\n.hidden\tsgn0_pty_mont_384x\n.type\tsgn0_pty_mont_384x,%function\n.align\t5\nsgn0_pty_mont_384x:\n\tpaciasp\n\tstp\tc29,c30,[csp,#-16*__SIZEOF_POINTER__]!\n\tadd\tc29,csp,#0\n\tstp\tc19,c20,[csp,#2*__SIZEOF_POINTER__]\n\tstp\tc21,c22,[csp,#4*__SIZEOF_POINTER__]\n\tstp\tc23,c24,[csp,#6*__SIZEOF_POINTER__]\n\tstp\tc25,c26,[csp,#8*__SIZEOF_POINTER__]\n\tstp\tc27,c28,[csp,#10*__SIZEOF_POINTER__]\n\n\tmov\t$n0,$b_ptr\n\tldp\t@mod[0],@mod[1],[$a_ptr]\n\tldp\t@mod[2],@mod[3],[$a_ptr,#16]\n\tldp\t@mod[4],@mod[5],[$a_ptr,#32]\n\tcmov\t$a_ptr,$r_ptr\n\n\tbl\t__mul_by_1_mont_384\n\tcadd\t$a_ptr,$a_ptr,#48\n\n\tand\t$b_ptr,@a[0],#1\n\t orr\t$n_ptr,@a[0],@a[1]\n\tadds\t@a[0],@a[0],@a[0]\n\t orr\t$n_ptr,$n_ptr,@a[2]\n\tadcs\t@a[1],@a[1],@a[1]\n\t orr\t$n_ptr,$n_ptr,@a[3]\n\tadcs\t@a[2],@a[2],@a[2]\n\t orr\t$n_ptr,$n_ptr,@a[4]\n\tadcs\t@a[3],@a[3],@a[3]\n\t orr\t$n_ptr,$n_ptr,@a[5]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t$bi,xzr,xzr\n\n\tsubs\t@a[0],@a[0],@mod[0]\n\tsbcs\t@a[1],@a[1],@mod[1]\n\tsbcs\t@a[2],@a[2],@mod[2]\n\tsbcs\t@a[3],@a[3],@mod[3]\n\tsbcs\t@a[4],@a[4],@mod[4]\n\tsbcs\t@a[5],@a[5],@mod[5]\n\tsbc\t$bi,$bi,xzr\n\n\tmvn\t$bi,$bi\n\tand\t$bi,$bi,#2\n\torr\t$b_ptr,$b_ptr,$bi\n\n\tbl\t__mul_by_1_mont_384\n\tldr\tc30,[c29,#__SIZEOF_POINTER__]\n\n\tand\t$r_ptr,@a[0],#1\n\t orr\t$a_ptr,@a[0],@a[1]\n\tadds\t@a[0],@a[0],@a[0]\n\t orr\t$a_ptr,$a_ptr,@a[2]\n\tadcs\t@a[1],@a[1],@a[1]\n\t orr\t$a_ptr,$a_ptr,@a[3]\n\tadcs\t@a[2],@a[2],@a[2]\n\t orr\t$a_ptr,$a_ptr,@a[4]\n\tadcs\t@a[3],@a[3],@a[3]\n\t orr\t$a_ptr,$a_ptr,@a[5]\n\tadcs\t@a[4],@a[4],@a[4]\n\tadcs\t@a[5],@a[5],@a[5]\n\tadc\t$bi,xzr,xzr\n\n\tsubs\t@a[0],@a[0],@mod[0]\n\tsbcs\t@a[1],@a[1],@mod[1]\n\tsbcs\t@a[2],@a[2],@mod[2]\n\tsbcs\t@a[3],@a[3],@mod[3]\n\tsbcs\t@a[4],@a[4],@mod[4]\n\tsbcs\t@a[5],@a[5],@mod[5]\n\tsbc\t$bi,$bi,xzr\n\n\tmvn\t$bi,$bi\n\tand\t$bi,$bi,#2\n\torr\t$r_ptr,$r_ptr,$bi\n\n\tcmp\t$n_ptr,#0\n\tcsel\t$n_ptr,$r_ptr,$b_ptr,eq\t// a->re==0? prty(a->im) : prty(a->re)\n\n\tcmp\t$a_ptr,#0\n\tcsel\t$a_ptr,$r_ptr,$b_ptr,ne\t// a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\t$n_ptr,$n_ptr,#1\n\tand\t$a_ptr,$a_ptr,#2\n\torr\t$r_ptr,$a_ptr,$n_ptr\t\t// pack sign and parity\n\n\tldp\tc19,c20,[c29,#2*__SIZEOF_POINTER__]\n\tldp\tc21,c22,[c29,#4*__SIZEOF_POINTER__]\n\tldp\tc23,c24,[c29,#6*__SIZEOF_POINTER__]\n\tldp\tc25,c26,[c29,#8*__SIZEOF_POINTER__]\n\tldp\tc27,c28,[c29,#10*__SIZEOF_POINTER__]\n\tldr\tc29,[csp],#16*__SIZEOF_POINTER__\n\tautiasp\n\tret\n.size\tsgn0_pty_mont_384x,.-sgn0_pty_mont_384x\n___\n\nif (0) {\nmy @b = ($bi, @mod[0..4]);\nmy @comba = @acc[4..6];\n\n$code.=<<___;\n.type\t__mul_384_comba,%function\n.align\t5\n__mul_384_comba:\n\tldp\t@a[0],@a[1],[$a_ptr]\n\tldp\t@b[0],@b[1],[$b_ptr]\n\tldp\t@a[2],@a[3],[$a_ptr,#16]\n\tldp\t@a[4],@a[5],[$a_ptr,#32]\n\tldp\t@b[2],@b[3],[$b_ptr,#16]\n\tldp\t@b[4],@b[5],[$b_ptr,#32]\n\n\tmul\t@comba[0],@a[0],@b[0]\n\tumulh\t@comba[1],@a[0],@b[0]\n\t mul\t@acc[0],@a[1],@b[0]\n\t umulh\t@acc[1],@a[1],@b[0]\n\tstr\t@comba[0],[$r_ptr]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[2],@a[0],@b[1]\n\tumulh\t@acc[3],@a[0],@b[1]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],xzr,      @acc[1]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[0],@a[2],@b[0]\n\tumulh\t@acc[1],@a[2],@b[0]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#8]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[2],@a[1],@b[1]\n\tumulh\t@acc[3],@a[1],@b[1]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[0],@a[0],@b[2]\n\tumulh\t@acc[1],@a[0],@b[2]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[2],@a[3],@b[0]\n\t umulh\t@acc[3],@a[3],@b[0]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#16]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[0],@a[2],@b[1]\n\tumulh\t@acc[1],@a[2],@b[1]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[2],@a[1],@b[2]\n\tumulh\t@acc[3],@a[1],@b[2]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[0],@a[0],@b[3]\n\tumulh\t@acc[1],@a[0],@b[3]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[2],@a[4],@b[0]\n\t umulh\t@acc[3],@a[4],@b[0]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#24]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[0],@a[3],@b[1]\n\tumulh\t@acc[1],@a[3],@b[1]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[2],@a[2],@b[2]\n\tumulh\t@acc[3],@a[2],@b[2]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[0],@a[1],@b[3]\n\tumulh\t@acc[1],@a[1],@b[3]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[2],@a[0],@b[4]\n\tumulh\t@acc[3],@a[0],@b[4]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[0],@a[5],@b[0]\n\t umulh\t@acc[1],@a[5],@b[0]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#32]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[2],@a[4],@b[1]\n\tumulh\t@acc[3],@a[4],@b[1]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[0],@a[3],@b[2]\n\tumulh\t@acc[1],@a[3],@b[2]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[2],@a[2],@b[3]\n\tumulh\t@acc[3],@a[2],@b[3]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[0],@a[1],@b[4]\n\tumulh\t@acc[1],@a[1],@b[4]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[2],@a[0],@b[5]\n\tumulh\t@acc[3],@a[0],@b[5]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[0],@a[5],@b[1]\n\t umulh\t@acc[1],@a[5],@b[1]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#40]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[2],@a[4],@b[2]\n\tumulh\t@acc[3],@a[4],@b[2]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[0],@a[3],@b[3]\n\tumulh\t@acc[1],@a[3],@b[3]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[2],@a[2],@b[4]\n\tumulh\t@acc[3],@a[2],@b[4]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[0],@a[1],@b[5]\n\tumulh\t@acc[1],@a[1],@b[5]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[2],@a[5],@b[2]\n\t umulh\t@acc[3],@a[5],@b[2]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#48]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[0],@a[4],@b[3]\n\tumulh\t@acc[1],@a[4],@b[3]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[2],@a[3],@b[4]\n\tumulh\t@acc[3],@a[3],@b[4]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tmul\t@acc[0],@a[2],@b[5]\n\tumulh\t@acc[1],@a[2],@b[5]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[2],@a[5],@b[3]\n\t umulh\t@acc[3],@a[5],@b[3]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#56]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[0],@a[4],@b[4]\n\tumulh\t@acc[1],@a[4],@b[4]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],xzr,xzr\n\tmul\t@acc[2],@a[3],@b[5]\n\tumulh\t@acc[3],@a[3],@b[5]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],@comba[2],xzr\n\t mul\t@acc[0],@a[5],@b[4]\n\t umulh\t@acc[1],@a[5],@b[4]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#64]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tmul\t@acc[2],@a[4],@b[5]\n\tumulh\t@acc[3],@a[4],@b[5]\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadcs\t@comba[1],@comba[1],@acc[1]\n\tadc\t@comba[2],xzr,xzr\n\t mul\t@acc[0],@a[5],@b[5]\n\t umulh\t@acc[1],@a[5],@b[5]\n\tadds\t@comba[0],@comba[0],@acc[2]\n\tadcs\t@comba[1],@comba[1],@acc[3]\n\tadc\t@comba[2],@comba[2],xzr\n\tstr\t@comba[0],[$r_ptr,#72]\n___\n\tpush(@comba,shift(@comba));\n$code.=<<___;\n\tadds\t@comba[0],@comba[0],@acc[0]\n\tadc\t@comba[1],@comba[1],@acc[1]\n\tstp\t@comba[0],@comba[1],[$r_ptr,#80]\n\n\tret\n.size\t__mul_384_comba,.-__mul_384_comba\n___\n}\nprint $code;\n\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/mulq_mont_256-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# As for \"sparse\" in subroutine names, see commentary in the\n# asm/mulx_mont_256-x86_64.pl module.\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$code.=<<___ if ($flavour =~ /masm/);\n.extern\tmul_mont_sparse_256\\$1\n.extern\tsqr_mont_sparse_256\\$1\n.extern\tfrom_mont_256\\$1\n.extern\tredc_mont_256\\$1\n___\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr,$n0) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\",\"%r8\");\n$b_ptr = \"%rbx\";\n\n{ ############################################################## 256 bits\nmy @acc=map(\"%r$_\",(9..15));\n\n{ ############################################################## mulq\nmy ($hi, $a0) = (\"%rbp\", $r_ptr);\n\n$code.=<<___;\n.comm\t__blst_platform_cap,4\n.text\n\n.globl\tmul_mont_sparse_256\n.hidden\tmul_mont_sparse_256\n.type\tmul_mont_sparse_256,\\@function,5,\"unwind\"\n.align\t32\nmul_mont_sparse_256:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tmul_mont_sparse_256\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$r_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t8*0($b_org), %rax\n\tmov\t8*0($a_ptr), @acc[4]\n\tmov\t8*1($a_ptr), @acc[5]\n\tmov\t8*2($a_ptr), @acc[3]\n\tmov\t8*3($a_ptr), $hi\n\tmov\t$b_org, $b_ptr\t\t# evacuate from %rdx\n\n\tmov\t%rax, @acc[6]\n\tmulq\t@acc[4]\t\t\t# a[0]*b[0]\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[6], %rax\n\tmov\t%rdx, @acc[1]\n\tcall\t__mulq_mont_sparse_256\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_mont_sparse_256,.-mul_mont_sparse_256\n\n.globl\tsqr_mont_sparse_256\n.hidden\tsqr_mont_sparse_256\n.type\tsqr_mont_sparse_256,\\@function,4,\"unwind\"\n.align\t32\nsqr_mont_sparse_256:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_mont_sparse_256\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$r_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t8*0($a_ptr), %rax\n\tmov\t$n_ptr, $n0\n\tmov\t8*1($a_ptr), @acc[5]\n\tmov\t$b_org, $n_ptr\n\tmov\t8*2($a_ptr), @acc[3]\n\tlea\t($a_ptr), $b_ptr\n\tmov\t8*3($a_ptr), $hi\n\n\tmov\t%rax, @acc[6]\n\tmulq\t%rax\t\t\t# a[0]*a[0]\n\tmov\t%rax, @acc[0]\n\tmov\t@acc[6], %rax\n\tmov\t%rdx, @acc[1]\n\tcall\t__mulq_mont_sparse_256\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_mont_sparse_256,.-sqr_mont_sparse_256\n___\n{\nmy @acc=@acc;\n$code.=<<___;\n.type\t__mulq_mont_sparse_256,\\@abi-omnipotent\n.align\t32\n__mulq_mont_sparse_256:\n\tmulq\t@acc[5]\t\t\t# a[1]*b[0]\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[2]\n\n\tmulq\t@acc[3]\t\t\t# a[2]*b[0]\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[3]\n\n\tmulq\t$hi\t\t\t# a[3]*b[0]\n\tadd\t%rax, @acc[3]\n\t mov\t8($b_ptr), %rax\n\tadc\t\\$0, %rdx\n\txor\t@acc[5], @acc[5]\n\tmov\t%rdx, @acc[4]\n\n___\nfor (my $i=1; $i<4; $i++) {\nmy $b_next = $i<3 ? 8*($i+1).\"($b_ptr)\" : @acc[1];\n$code.=<<___;\n\tmov\t@acc[0], $a0\n\timulq\t$n0, @acc[0]\n\n\t################################# Multiply by b[$i]\n\tmov\t%rax, @acc[6]\n\tmulq\t8*0($a_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*1($a_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($a_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($a_ptr)\n\tadd\t%rax, @acc[4]\n\t mov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[4]\n\tadc\t%rdx, @acc[5]\t\t# can't overflow\n\txor\t@acc[6], @acc[6]\n\n\t################################# reduction\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, $a0\t\t# guaranteed to be zero\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, $a0\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$a0, @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($n_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($n_ptr)\n\tadd\t%rax, @acc[3]\n\t mov\t$b_next, %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tadd\t%rdx, @acc[4]\n\tadc\t\\$0, @acc[5]\n\tadc\t\\$0, @acc[6]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\timulq\t$n0, %rax\n\tmov\t8(%rsp), $a_ptr\t\t# restore $r_ptr\n\n\t################################# last reduction\n\tmov\t%rax, @acc[6]\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, @acc[0]\t\t# guaranteed to be zero\n\tmov\t@acc[6], %rax\n\tadc\t%rdx, @acc[0]\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[0], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($n_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($n_ptr)\n\t mov\t@acc[2], $b_ptr\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tadd\t%rax, @acc[3]\n\t mov\t@acc[1], %rax\n\tadc\t\\$0, %rdx\n\tadd\t%rdx, @acc[4]\n\tadc\t\\$0, @acc[5]\n\n\t#################################\n\t# Branch-less conditional subtraction of modulus\n\n\t mov\t@acc[3], @acc[0]\n\tsub\t8*0($n_ptr), @acc[1]\n\tsbb\t8*1($n_ptr), @acc[2]\n\tsbb\t8*2($n_ptr), @acc[3]\n\t mov\t@acc[4], $hi\n\tsbb\t8*3($n_ptr), @acc[4]\n\tsbb\t\\$0, @acc[5]\n\n\tcmovc\t%rax, @acc[1]\n\tcmovc\t$b_ptr, @acc[2]\n\tcmovc\t@acc[0], @acc[3]\n\tmov\t@acc[1], 8*0($a_ptr)\n\tcmovc\t$hi, @acc[4]\n\tmov\t@acc[2], 8*1($a_ptr)\n\tmov\t@acc[3], 8*2($a_ptr)\n\tmov\t@acc[4], 8*3($a_ptr)\n\n\tret\n.cfi_endproc\n.size\t__mulq_mont_sparse_256,.-__mulq_mont_sparse_256\n___\n} }\n{ my ($n_ptr, $n0)=($b_ptr, $n_ptr);\t# arguments are \"shifted\"\n\n$code.=<<___;\n.globl\tfrom_mont_256\n.hidden\tfrom_mont_256\n.type\tfrom_mont_256,\\@function,4,\"unwind\"\n.align\t32\nfrom_mont_256:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tfrom_mont_256\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\tcall\t__mulq_by_1_mont_256\n\n\t#################################\n\t# Branch-less conditional acc[0:3] - modulus\n\n\t#mov\t@acc[4], %rax\t\t# __mulq_by_1_mont_256 does it\n\tmov\t@acc[5], @acc[1]\n\tmov\t@acc[6], @acc[2]\n\tmov\t@acc[0], @acc[3]\n\n\tsub\t8*0($n_ptr), @acc[4]\n\tsbb\t8*1($n_ptr), @acc[5]\n\tsbb\t8*2($n_ptr), @acc[6]\n\tsbb\t8*3($n_ptr), @acc[0]\n\n\tcmovnc\t@acc[4], %rax\n\tcmovnc\t@acc[5], @acc[1]\n\tcmovnc\t@acc[6], @acc[2]\n\tmov\t%rax,    8*0($r_ptr)\n\tcmovnc\t@acc[0], @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tfrom_mont_256,.-from_mont_256\n\n.globl\tredc_mont_256\n.hidden\tredc_mont_256\n.type\tredc_mont_256,\\@function,4,\"unwind\"\n.align\t32\nredc_mont_256:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tredc_mont_256\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\tcall\t__mulq_by_1_mont_256\n\n\tadd\t8*4($a_ptr), @acc[4]\t# accumulate upper half\n\tadc\t8*5($a_ptr), @acc[5]\n\tmov\t@acc[4], %rax\n\tadc\t8*6($a_ptr), @acc[6]\n\tmov\t@acc[5], @acc[1]\n\tadc\t8*7($a_ptr), @acc[0]\n\tsbb\t$a_ptr, $a_ptr\n\n\t#################################\n\t# Branch-less conditional acc[0:4] - modulus\n\n\tmov\t@acc[6], @acc[2]\n\tsub\t8*0($n_ptr), @acc[4]\n\tsbb\t8*1($n_ptr), @acc[5]\n\tsbb\t8*2($n_ptr), @acc[6]\n\tmov\t@acc[0], @acc[3]\n\tsbb\t8*3($n_ptr), @acc[0]\n\tsbb\t\\$0, $a_ptr\n\n\tcmovnc\t@acc[4], %rax \n\tcmovnc\t@acc[5], @acc[1]\n\tcmovnc\t@acc[6], @acc[2]\n\tmov\t%rax,    8*0($r_ptr)\n\tcmovnc\t@acc[0], @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tredc_mont_256,.-redc_mont_256\n___\n{\nmy @acc=@acc;\n\n$code.=<<___;\n.type\t__mulq_by_1_mont_256,\\@abi-omnipotent\n.align\t32\n__mulq_by_1_mont_256:\n\tmov\t8*0($a_ptr), %rax\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n\tmov\t%rax, @acc[4]\n\timulq\t$n0, %rax\n\tmov\t%rax, @acc[0]\n___\nfor (my $i=0; $i<4; $i++) {\nmy $hi = @acc[4];\n$code.=<<___;\n\t################################# reduction $i\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, @acc[4]\t\t# guaranteed to be zero\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, @acc[4]\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[4], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($n_ptr)\n___\n$code.=<<___\tif ($i<3);\n\t mov\t@acc[1], @acc[5]\n\t imulq\t$n0, @acc[1]\n___\n$code.=<<___;\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($n_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[1], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\tret\n.size\t__mulq_by_1_mont_256,.-__mulq_by_1_mont_256\n___\n} } }\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/mulq_mont_384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$code.=<<___ if ($flavour =~ /masm/);\n.extern\tmul_mont_384x\\$1\n.extern\tsqr_mont_384x\\$1\n.extern\tmul_382x\\$1\n.extern\tsqr_382x\\$1\n.extern\tmul_384\\$1\n.extern\tsqr_384\\$1\n.extern\tredc_mont_384\\$1\n.extern\tfrom_mont_384\\$1\n.extern\tsgn0_pty_mont_384\\$1\n.extern\tsgn0_pty_mont_384x\\$1\n.extern\tmul_mont_384\\$1\n.extern\tsqr_mont_384\\$1\n.extern\tsqr_n_mul_mont_384\\$1\n.extern\tsqr_n_mul_mont_383\\$1\n.extern\tsqr_mont_382x\\$1\n___\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr,$n0) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\",\"%r8\");\n$b_ptr = \"%rbx\";\n\n# common accumulator layout\n@acc=map(\"%r$_\",(8..15));\n\n########################################################################\n{ my @acc=(@acc,\"%rax\",\"%rbx\",\"%rbp\",$a_ptr);\t# all registers are affected\n\t\t\t\t\t\t# except for $n_ptr and $r_ptr\n$code.=<<___;\n.comm\t__blst_platform_cap,4\n.text\n\n########################################################################\n# Double-width subtraction modulo n<<384, as opposite to naively\n# expected modulo n*n. It works because n<<384 is the actual\n# input boundary condition for Montgomery reduction, not n*n.\n# Just in case, this is duplicated, but only one module is\n# supposed to be linked...\n.type\t__subq_mod_384x384,\\@abi-omnipotent\n.align\t32\n__subq_mod_384x384:\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t8*6($a_ptr), @acc[6]\n\n\tsub\t8*0($b_org), @acc[0]\n\tmov\t8*7($a_ptr), @acc[7]\n\tsbb\t8*1($b_org), @acc[1]\n\tmov\t8*8($a_ptr), @acc[8]\n\tsbb\t8*2($b_org), @acc[2]\n\tmov\t8*9($a_ptr), @acc[9]\n\tsbb\t8*3($b_org), @acc[3]\n\tmov\t8*10($a_ptr), @acc[10]\n\tsbb\t8*4($b_org), @acc[4]\n\tmov\t8*11($a_ptr), @acc[11]\n\tsbb\t8*5($b_org), @acc[5]\n\t mov\t@acc[0], 8*0($r_ptr)\n\tsbb\t8*6($b_org), @acc[6]\n\t mov\t8*0($n_ptr), @acc[0]\n\t mov\t@acc[1], 8*1($r_ptr)\n\tsbb\t8*7($b_org), @acc[7]\n\t mov\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[2], 8*2($r_ptr)\n\tsbb\t8*8($b_org), @acc[8]\n\t mov\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[3], 8*3($r_ptr)\n\tsbb\t8*9($b_org), @acc[9]\n\t mov\t8*3($n_ptr), @acc[3]\n\t mov\t@acc[4], 8*4($r_ptr)\n\tsbb\t8*10($b_org), @acc[10]\n\t mov\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], 8*5($r_ptr)\n\tsbb\t8*11($b_org), @acc[11]\n\t mov\t8*5($n_ptr), @acc[5]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[0]\n\tand\t$b_org, @acc[1]\n\tand\t$b_org, @acc[2]\n\tand\t$b_org, @acc[3]\n\tand\t$b_org, @acc[4]\n\tand\t$b_org, @acc[5]\n\n\tadd\t@acc[0], @acc[6]\n\tadc\t@acc[1], @acc[7]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tadc\t@acc[2], @acc[8]\n\tmov\t@acc[7], 8*7($r_ptr)\n\tadc\t@acc[3], @acc[9]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tadc\t@acc[4], @acc[10]\n\tmov\t@acc[9], 8*9($r_ptr)\n\tadc\t@acc[5], @acc[11]\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tret\n.size\t__subq_mod_384x384,.-__subq_mod_384x384\n\n.type\t__addq_mod_384,\\@abi-omnipotent\n.align\t32\n__addq_mod_384:\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tadd\t8*0($b_org), @acc[0]\n\tadc\t8*1($b_org), @acc[1]\n\tadc\t8*2($b_org), @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t8*3($b_org), @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t8*4($b_org), @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t8*5($b_org), @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$b_org, $b_org\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t@acc[9],  @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[10], @acc[4]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tcmovc\t@acc[11], @acc[5]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__addq_mod_384,.-__addq_mod_384\n\n.type\t__subq_mod_384,\\@abi-omnipotent\n.align\t32\n__subq_mod_384:\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n__subq_mod_384_a_is_loaded:\n\tsub\t8*0($b_org), @acc[0]\n\t mov\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($b_org), @acc[1]\n\t mov\t8*1($n_ptr), @acc[7]\n\tsbb\t8*2($b_org), @acc[2]\n\t mov\t8*2($n_ptr), @acc[8]\n\tsbb\t8*3($b_org), @acc[3]\n\t mov\t8*3($n_ptr), @acc[9]\n\tsbb\t8*4($b_org), @acc[4]\n\t mov\t8*4($n_ptr), @acc[10]\n\tsbb\t8*5($b_org), @acc[5]\n\t mov\t8*5($n_ptr), @acc[11]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[6]\n\tand\t$b_org, @acc[7]\n\tand\t$b_org, @acc[8]\n\tand\t$b_org, @acc[9]\n\tand\t$b_org, @acc[10]\n\tand\t$b_org, @acc[11]\n\n\tadd\t@acc[6], @acc[0]\n\tadc\t@acc[7], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[8], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[9], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tadc\t@acc[10], @acc[4]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tadc\t@acc[11], @acc[5]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__subq_mod_384,.-__subq_mod_384\n___\n}\n\n########################################################################\n# \"Complex\" multiplication and squaring. Use vanilla multiplication when\n# possible to fold reductions. I.e. instead of mul_mont, mul_mont\n# followed by add/sub_mod, it calls mul, mul, double-width add/sub_mod\n# followed by *common* reduction...\n{ my $frame = 5*8 +\t# place for argument off-load +\n\t      3*768/8;\t# place for 3 768-bit temporary vectors\n$code.=<<___;\n.globl\tmul_mont_384x\n.hidden\tmul_mont_384x\n.type\tmul_mont_384x,\\@function,5,\"unwind\"\n.align\t32\nmul_mont_384x:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tmul_mont_384x\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tmov\t$b_org, $b_ptr\n\tmov\t$r_ptr, 8*4(%rsp)\t# offload arguments\n\tmov\t$a_ptr, 8*3(%rsp)\n\tmov\t$b_org, 8*2(%rsp)\n\tmov\t$n_ptr, 8*1(%rsp)\n\tmov\t$n0,    8*0(%rsp)\n\n\t################################# mul_384(t0, a->re, b->re);\n\t#lea\t0($b_btr), $b_ptr\t# b->re\n\t#lea\t0($a_ptr), $a_ptr\t# a->re\n\tlea\t40(%rsp), $r_ptr\t# t0\n\tcall\t__mulq_384\n\n\t################################# mul_384(t1, a->im, b->im);\n\tlea\t48($b_ptr), $b_ptr\t# b->im\n\tlea\t48($a_ptr), $a_ptr\t# a->im\n\tlea\t40+96(%rsp), $r_ptr\t# t1\n\tcall\t__mulq_384\n\n\t################################# mul_384(t2, a->re+a->im, b->re+b->im);\n\tmov\t8*1(%rsp), $n_ptr\n\tlea\t-48($a_ptr), $b_org\n\tlea\t40+192+48(%rsp), $r_ptr\n\tcall\t__addq_mod_384\n\n\tmov\t8*2(%rsp), $a_ptr\n\tlea\t48($a_ptr), $b_org\n\tlea\t-48($r_ptr), $r_ptr\n\tcall\t__addq_mod_384\n\n\tlea\t($r_ptr),$b_ptr\n\tlea\t48($r_ptr),$a_ptr\n\tcall\t__mulq_384\n\n\t################################# t2=t2-t0-t1\n\tlea\t($r_ptr), $a_ptr\t# t2\n\tlea\t40(%rsp), $b_org\t# t0\n\tmov\t8*1(%rsp), $n_ptr\n\tcall\t__subq_mod_384x384\t# t2=t2-t0\n\n\tlea\t($r_ptr), $a_ptr\t# t2\n\tlea\t-96($r_ptr), $b_org\t# t1\n\tcall\t__subq_mod_384x384\t# t2=t2-t1\n\n\t################################# t0=t0-t1\n\tlea\t40(%rsp), $a_ptr\n\tlea\t40+96(%rsp), $b_org\n\tlea\t40(%rsp), $r_ptr\n\tcall\t__subq_mod_384x384\t# t0-t1\n\n\tmov\t$n_ptr, $b_ptr\t\t# n_ptr for redc_mont_384\n\n\t################################# redc_mont_384(ret->re, t0, mod, n0);\n\tlea\t40(%rsp), $a_ptr\t# t0\n\tmov\t8*0(%rsp), %rcx\t\t# n0 for redc_mont_384\n\tmov\t8*4(%rsp), $r_ptr\t# ret->re\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\t################################# redc_mont_384(ret->im, t2, mod, n0);\n\tlea\t40+192(%rsp), $a_ptr\t# t2\n\tmov\t8*0(%rsp), %rcx\t\t# n0 for redc_mont_384\n\tlea\t48($r_ptr), $r_ptr\t# ret->im\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_mont_384x,.-mul_mont_384x\n___\n}\n{ my $frame = 4*8 +\t# place for argument off-load +\n\t      2*384/8 +\t# place for 2 384-bit temporary vectors\n\t      8;\t# align\n$code.=<<___;\n.globl\tsqr_mont_384x\n.hidden\tsqr_mont_384x\n.type\tsqr_mont_384x,\\@function,4,\"unwind\"\n.align\t32\nsqr_mont_384x:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_mont_384x\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tmov\t$n_ptr, 8*0(%rsp)\t# n0\n\tmov\t$b_org, $n_ptr\t\t# n_ptr\n\tmov\t$r_ptr, 8*1(%rsp)\t# to __mulq_mont_384\n\tmov\t$a_ptr, 8*2(%rsp)\n\n\t################################# add_mod_384(t0, a->re, a->im);\n\tlea\t48($a_ptr), $b_org\t# a->im\n\tlea\t32(%rsp), $r_ptr\t# t0\n\tcall\t__addq_mod_384\n\n\t################################# sub_mod_384(t1, a->re, a->im);\n\tmov\t8*2(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_org\t# a->im\n\tlea\t32+48(%rsp), $r_ptr\t# t1\n\tcall\t__subq_mod_384\n\n\t################################# mul_mont_384(ret->im, a->re, a->im, mod, n0);\n\tmov\t8*2(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_ptr\t# a->im\n\n\tmov\t48($a_ptr), %rax\t# a->im\n\tmov\t8*0($a_ptr), @acc[6]\t# a->re\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[4]\n\tmov\t8*3($a_ptr), @acc[5]\n\n\tcall\t__mulq_mont_384\n___\n{\nmy @acc = map(\"%r$_\",14,15,8..11,\t# output from __mulq_mont_384\n                     12,13,\"ax\",\"bx\",\"bp\",\"si\");\n$code.=<<___;\n\tadd\t@acc[0], @acc[0]\t# add with itself\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t@acc[3], @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t@acc[4], @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t@acc[5], @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$b_org, $b_org\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tmov\t@acc[0],  8*6($r_ptr)\t# ret->im\n\tcmovc\t@acc[9],  @acc[3]\n\tmov\t@acc[1],  8*7($r_ptr)\n\tcmovc\t@acc[10], @acc[4]\n\tmov\t@acc[2],  8*8($r_ptr)\n\tcmovc\t@acc[11], @acc[5]\n\tmov\t@acc[3],  8*9($r_ptr)\n\tmov\t@acc[4],  8*10($r_ptr)\n\tmov\t@acc[5],  8*11($r_ptr)\n___\n}\n$code.=<<___;\n\t################################# mul_mont_384(ret->re, t0, t1, mod, n0);\n\tlea\t32(%rsp), $a_ptr\t# t0\n\tlea\t32+48(%rsp), $b_ptr\t# t1\n\n\tmov\t32+48(%rsp), %rax\t# t1[0]\n\tmov\t32+8*0(%rsp), @acc[6]\t# t0[0..3]\n\tmov\t32+8*1(%rsp), @acc[7]\n\tmov\t32+8*2(%rsp), @acc[4]\n\tmov\t32+8*3(%rsp), @acc[5]\n\n\tcall\t__mulq_mont_384\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_mont_384x,.-sqr_mont_384x\n\n.globl\tmul_382x\n.hidden\tmul_382x\n.type\tmul_382x,\\@function,4,\"unwind\"\n.align\t32\nmul_382x:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tmul_382x\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tlea\t96($r_ptr), $r_ptr\t# ret->im\n\tmov\t$a_ptr, 8*0(%rsp)\n\tmov\t$b_org, 8*1(%rsp)\n\tmov\t$r_ptr, 8*2(%rsp)\t# offload ret->im\n\tmov\t$n_ptr, 8*3(%rsp)\n\n\t################################# t0 = a->re + a->im\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tadd\t8*6($a_ptr), @acc[0]\n\tadc\t8*7($a_ptr), @acc[1]\n\tadc\t8*8($a_ptr), @acc[2]\n\tadc\t8*9($a_ptr), @acc[3]\n\tadc\t8*10($a_ptr), @acc[4]\n\tadc\t8*11($a_ptr), @acc[5]\n\n\tmov\t@acc[0], 32+8*0(%rsp)\n\tmov\t@acc[1], 32+8*1(%rsp)\n\tmov\t@acc[2], 32+8*2(%rsp)\n\tmov\t@acc[3], 32+8*3(%rsp)\n\tmov\t@acc[4], 32+8*4(%rsp)\n\tmov\t@acc[5], 32+8*5(%rsp)\n\n\t################################# t1 = b->re + b->im\n\tmov\t8*0($b_org), @acc[0]\n\tmov\t8*1($b_org), @acc[1]\n\tmov\t8*2($b_org), @acc[2]\n\tmov\t8*3($b_org), @acc[3]\n\tmov\t8*4($b_org), @acc[4]\n\tmov\t8*5($b_org), @acc[5]\n\n\tadd\t8*6($b_org), @acc[0]\n\tadc\t8*7($b_org), @acc[1]\n\tadc\t8*8($b_org), @acc[2]\n\tadc\t8*9($b_org), @acc[3]\n\tadc\t8*10($b_org), @acc[4]\n\tadc\t8*11($b_org), @acc[5]\n\n\tmov\t@acc[0], 32+8*6(%rsp)\n\tmov\t@acc[1], 32+8*7(%rsp)\n\tmov\t@acc[2], 32+8*8(%rsp)\n\tmov\t@acc[3], 32+8*9(%rsp)\n\tmov\t@acc[4], 32+8*10(%rsp)\n\tmov\t@acc[5], 32+8*11(%rsp)\n\n\t################################# mul_384(ret->im, t0, t1);\n\tlea\t32+8*0(%rsp), $a_ptr\t# t0\n\tlea\t32+8*6(%rsp), $b_ptr\t# t1\n\tcall\t__mulq_384\n\n\t################################# mul_384(ret->re, a->re, b->re);\n\tmov\t8*0(%rsp), $a_ptr\n\tmov\t8*1(%rsp), $b_ptr\n\tlea\t-96($r_ptr), $r_ptr\t# ret->re\n\tcall\t__mulq_384\n\n\t################################# mul_384(tx, a->im, b->im);\n\tlea\t48($a_ptr), $a_ptr\n\tlea\t48($b_ptr), $b_ptr\n\tlea\t32(%rsp), $r_ptr\n\tcall\t__mulq_384\n\n\t################################# ret->im -= tx\n\tmov\t8*2(%rsp), $a_ptr\t# restore ret->im\n\tlea\t32(%rsp), $b_org\n\tmov\t8*3(%rsp), $n_ptr\n\tmov\t$a_ptr, $r_ptr\n\tcall\t__subq_mod_384x384\n\n\t################################# ret->im -= ret->re\n\tlea\t0($r_ptr), $a_ptr\n\tlea\t-96($r_ptr), $b_org\n\tcall\t__subq_mod_384x384\n\n\t################################# ret->re -= tx\n\tlea\t-96($r_ptr), $a_ptr\n\tlea\t32(%rsp), $b_org\n\tlea\t-96($r_ptr), $r_ptr\n\tcall\t__subq_mod_384x384\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_382x,.-mul_382x\n___\n}\n{ my @acc=(@acc,\"%rax\",\"%rbx\",\"%rbp\",$b_org);\t# all registers are affected\n\t\t\t\t\t\t# except for $n_ptr and $r_ptr\n$code.=<<___;\n.globl\tsqr_382x\n.hidden\tsqr_382x\n.type\tsqr_382x,\\@function,3,\"unwind\"\n.align\t32\nsqr_382x:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_382x\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$a_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\n\t################################# t0 = a->re + a->im\n\tmov\t8*0($a_ptr), @acc[6]\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[9]\n\tmov\t8*4($a_ptr), @acc[10]\n\tmov\t8*5($a_ptr), @acc[11]\n\n\tmov\t@acc[6], @acc[0]\n\tadd\t8*6($a_ptr), @acc[6]\n\tmov\t@acc[7], @acc[1]\n\tadc\t8*7($a_ptr), @acc[7]\n\tmov\t@acc[8], @acc[2]\n\tadc\t8*8($a_ptr), @acc[8]\n\tmov\t@acc[9], @acc[3]\n\tadc\t8*9($a_ptr), @acc[9]\n\tmov\t@acc[10], @acc[4]\n\tadc\t8*10($a_ptr), @acc[10]\n\tmov\t@acc[11], @acc[5]\n\tadc\t8*11($a_ptr), @acc[11]\n\n\tmov\t@acc[6], 8*0($r_ptr)\n\tmov\t@acc[7], 8*1($r_ptr)\n\tmov\t@acc[8], 8*2($r_ptr)\n\tmov\t@acc[9], 8*3($r_ptr)\n\tmov\t@acc[10], 8*4($r_ptr)\n\tmov\t@acc[11], 8*5($r_ptr)\n\n\t################################# t1 = a->re - a->im\n\tlea\t48($a_ptr), $b_org\n\tlea\t48($r_ptr), $r_ptr\n\tcall\t__subq_mod_384_a_is_loaded\n\n\t################################# mul_384(ret->re, t0, t1);\n\tlea\t($r_ptr), $a_ptr\n\tlea\t-48($r_ptr), $b_ptr\n\tlea\t-48($r_ptr), $r_ptr\n\tcall\t__mulq_384\n\n\t################################# mul_384(ret->im, a->re, a->im);\n\tmov\t(%rsp), $a_ptr\n\tlea\t48($a_ptr), $b_ptr\n\tlea\t96($r_ptr), $r_ptr\n\tcall\t__mulq_384\n\n\tmov\t8*0($r_ptr), @acc[0]\t# double ret->im\n\tmov\t8*1($r_ptr), @acc[1]\n\tmov\t8*2($r_ptr), @acc[2]\n\tmov\t8*3($r_ptr), @acc[3]\n\tmov\t8*4($r_ptr), @acc[4]\n\tmov\t8*5($r_ptr), @acc[5]\n\tmov\t8*6($r_ptr), @acc[6]\n\tmov\t8*7($r_ptr), @acc[7]\n\tmov\t8*8($r_ptr), @acc[8]\n\tmov\t8*9($r_ptr), @acc[9]\n\tmov\t8*10($r_ptr), @acc[10]\n\tadd\t@acc[0], @acc[0]\n\tmov\t8*11($r_ptr), @acc[11]\n\tadc\t@acc[1], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[2], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[3], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tadc\t@acc[4], @acc[4]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tadc\t@acc[5], @acc[5]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tadc\t@acc[6], @acc[6]\n\tmov\t@acc[5], 8*5($r_ptr)\n\tadc\t@acc[7], @acc[7]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tadc\t@acc[8], @acc[8]\n\tmov\t@acc[7], 8*7($r_ptr)\n\tadc\t@acc[9], @acc[9]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tadc\t@acc[10], @acc[10]\n\tmov\t@acc[9], 8*9($r_ptr)\n\tadc\t@acc[11], @acc[11]\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tmov\t8*1(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8*2(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t8*3(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t8*4(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8*5(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*6(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*7(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*7\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_382x,.-sqr_382x\n___\n}\n{ ########################################################## 384-bit mul\nmy @acc=map(\"%r$_\",(\"cx\",8..12));\nmy $bi = \"%rbp\";\n\n$code.=<<___;\n.globl\tmul_384\n.hidden\tmul_384\n.type\tmul_384,\\@function,3,\"unwind\"\n.align\t32\nmul_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tmul_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n.cfi_end_prologue\n\n\tmov\t$b_org, $b_ptr\n\tcall\t__mulq_384\n\n\tmov\t0(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t16(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t24(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-24\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_384,.-mul_384\n\n.type\t__mulq_384,\\@abi-omnipotent\n.align\t32\n__mulq_384:\n\tmov\t8*0($b_ptr), %rax\n\n\tmov\t%rax, $bi\n\tmulq\t8*0($a_ptr)\n\tmov\t%rax, 8*0($r_ptr)\n\tmov\t$bi, %rax\n\tmov\t%rdx, @acc[0]\n\n\tmulq\t8*1($a_ptr)\n\tadd\t%rax, @acc[0]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[1]\n\n\tmulq\t8*2($a_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[2]\n\n\tmulq\t8*3($a_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[3]\n\n\tmulq\t8*4($a_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n\n\tmulq\t8*5($a_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t8*1($b_ptr), %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[5]\n___\nfor(my $i=1; $i<6; $i++) {\nmy $b_next = $i<5 ? 8*($i+1).\"($b_ptr)\" : \"%rax\";\n$code.=<<___;\n\tmov\t%rax, $bi\n\tmulq\t8*0($a_ptr)\n\tadd\t%rax, @acc[0]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t@acc[0], 8*$i($r_ptr)\n\tmov\t%rdx, @acc[0]\n\n\tmulq\t8*1($a_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[1], @acc[0]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[1]\n\n\tmulq\t8*2($a_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[2], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[2]\n\n\tmulq\t8*3($a_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[3], @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[3]\n\n\tmulq\t8*4($a_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[4], @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n\n\tmulq\t8*5($a_ptr)\n\tadd\t%rax, @acc[5]\n\tmov\t$b_next, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[5], @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[5]\n___\n}\n$code.=<<___;\n\tmov\t@acc[0], 8*6($r_ptr)\n\tmov\t@acc[1], 8*7($r_ptr)\n\tmov\t@acc[2], 8*8($r_ptr)\n\tmov\t@acc[3], 8*9($r_ptr)\n\tmov\t@acc[4], 8*10($r_ptr)\n\tmov\t@acc[5], 8*11($r_ptr)\n\n\tret\n.size\t__mulq_384,.-__mulq_384\n___\n}\nif (0) { ##############################################################\nmy @b=map(\"%r$_\",(10..15));\nmy @a=reverse(@b);\n   @b[5]=$b_ptr;\nmy $bi = \"%rbp\";\nmy @comba=map(\"%r$_\",(\"cx\",8,9));\n#                                                   a[0]*b[0]\n#                                              a[1]*b[0]\n#                                              a[0]*b[1]\n#                                         a[2]*b[0]\n#                                         a[1]*b[1]\n#                                         a[0]*b[2]\n#                                    a[3]*b[0]\n#                                    a[2]*b[1]\n#                                    a[1]*b[2]\n#                                    a[0]*b[3]\n#                               a[4]*b[0]\n#                               a[3]*b[1]\n#                               a[2]*b[2]\n#                               a[1]*b[3]\n#                               a[0]*b[4]\n#                          a[5]*b[0]\n#                          a[4]*b[1]\n#                          a[3]*b[2]\n#                          a[2]*b[3]\n#                          a[1]*b[4]\n#                          a[0]*b[5]\n#                     a[5]*b[1]\n#                     a[4]*b[2]\n#                     a[3]*b[3]\n#                     a[2]*b[4]\n#                     a[1]*b[5]\n#                a[5]*b[2]\n#                a[4]*b[3]\n#                a[3]*b[4]\n#                a[2]*b[5]\n#           a[5]*b[3]\n#           a[4]*b[4]\n#           a[3]*b[5]\n#      a[5]*b[4]\n#      a[4]*b[5]\n# a[5]*b[5]\n#\n# 13% less instructions give +15% on Core2, +10% on Goldmont,\n# -0% on Sandy Bridge, but -16% on Haswell:-(\n# [for reference +5% on Skylake, +11% on Ryzen]\n\n$code.=<<___;\n.type\t__mulq_comba_384,\\@abi-omnipotent\n.align\t32\n__mulq_comba_384:\n\tmov\t8*0($b_ptr), %rax\n\tmov\t8*0($a_ptr), @a[0]\n\tmov\t8*1($a_ptr), @a[1]\n\tmov\t8*1($b_ptr), @b[1]\n\n\tmov\t%rax, @b[0]\n\tmulq\t@a[0]\t\t\t# a[0]*b[0]\n\tmov\t%rax, 8*0($r_ptr)\n\tmov\t@b[0], %rax\n\tmov\t%rdx, @comba[0]\n\n\t#################################\n\tmov\t8*2($a_ptr), @a[2]\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[1]\t\t\t# a[1]*b[0]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[1], %rax\n\tadc\t\\$0, %rdx\n\tmov\t8*2($b_ptr), @b[2]\n\tmov\t%rdx, @comba[1]\n\n\tmulq\t@a[0]\t\t\t# a[0]*b[1]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[0], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*1($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[2]\t\t\t# a[2]*b[0]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[1], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[1]\t\t\t# a[1]*b[1]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[2], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[0]\t\t\t# a[0]*b[2]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[0], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*2($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t8*3($a_ptr)\t\t# a[3]*b[0]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[1], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[2]\t\t\t# a[2]*b[1]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[2], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[1]\t\t\t# a[1]*b[2]\n\tadd\t%rax, @comba[0]\n\tmov\t8*3($b_ptr), %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmov\t%rax, @b[3]\n\tmulq\t@a[0]\t\t\t# a[0]*b[3]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[0], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*3($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t8*4($a_ptr)\t\t# a[4]*b[0]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[1], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*3($a_ptr)\t\t# a[3]*b[1]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[2], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*2($a_ptr)\t\t# a[2]*b[2]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[3], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[1]\t\t\t# a[1]*b[3]\n\tadd\t%rax, @comba[0]\n\tmov\t8*4($b_ptr), %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmov\t%rax, @b[4]\n\tmulq\t@a[0]\t\t\t# a[0]*b[4]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[0], %rax\n\tadc\t%rdx, @comba[1]\n\tmov\t8*5($a_ptr), @a[5]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*4($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[5]\t\t\t# a[5]*b[0]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[1], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*4($a_ptr)\t\t# a[4]*b[1]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[2], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*3($a_ptr)\t\t# a[3]*b[2]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[3], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*2($a_ptr)\t\t# a[2]*b[3]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[4], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*1($a_ptr)\t\t# a[1]*b[4]\n\tadd\t%rax, @comba[0]\n\tmov\t8*5($b_ptr), %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmov\t%rax, @b[5]\n\tmulq\t@a[0]\t\t\t# a[0]*b[5]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[1], %rax\n\tadc\t%rdx, @comba[1]\n\tmov\t8*4($a_ptr), @a[4]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*5($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[5]\t\t\t# a[5]*b[1]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[2], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[4]\t\t\t# a[4]*b[2]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[3], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*3($a_ptr)\t\t# a[3]*b[3]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[4], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*2($a_ptr)\t\t# a[2]*b[4]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[5], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*1($a_ptr)\t\t# a[1]*b[5]\n\tadd\t%rax, @comba[0]\n\tmov\t$b[2], %rax\n\tadc\t%rdx, @comba[1]\n\tmov\t8*3($a_ptr), @a[3]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*6($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[5]\t\t\t# a[5]*b[2]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[3], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[4]\t\t\t# a[4]*b[3]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[4], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[3]\t\t\t# a[3]*b[4]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[5], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t8*2($a_ptr)\t\t# a[2]*b[5]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[3], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*7($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[5]\t\t\t# a[5]*b[3]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[4], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[4]\t\t\t# a[4]*b[4]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[5], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[3]\t\t\t# a[3]*b[5]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[4], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*8($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\txor\t@comba[2], @comba[2]\n\tmulq\t@a[5]\t\t\t# a[5]*b[4]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[5], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\n\tmulq\t@a[4]\t\t\t# a[4]*b[5]\n\tadd\t%rax, @comba[0]\n\tmov\t@b[5], %rax\n\tadc\t%rdx, @comba[1]\n\tadc\t\\$0, @comba[2]\n\tmov\t@comba[0], 8*9($r_ptr)\n___\n    push(@comba,shift(@comba));\n$code.=<<___;\n\tmulq\t@a[5]\t\t\t# a[5]*b[4]\n\tadd\t%rax, @comba[0]\n\tadc\t%rdx, @comba[1]\n\n\tmov\t@comba[0], 8*10($r_ptr)\n\tmov\t@comba[1], 8*11($r_ptr)\n\n\tret\n.size\t__mulq_comba_384,.-__mulq_comba_384\n___\n}\n{ ########################################################## 384-bit sqr\nmy @acc=(@acc,\"%rcx\",\"%rbx\",\"%rbp\",$a_ptr);\nmy $hi;\n\n$code.=<<___;\n.globl\tsqr_384\n.hidden\tsqr_384\n.type\tsqr_384,\\@function,2,\"unwind\"\n.align\t32\nsqr_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tcall\t__sqrq_384\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_384,.-sqr_384\n\n.type\t__sqrq_384,\\@abi-omnipotent\n.align\t32\n__sqrq_384:\n\tmov\t8*0($a_ptr), %rax\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[9]\n\n\t#########################################\n\tmov\t%rax, @acc[6]\n\tmulq\t@acc[7]\t\t\t\t# a[1]*a[0]\n\tmov\t%rax, @acc[1]\n\tmov\t@acc[6], %rax\n\t mov\t8*4($a_ptr), @acc[10]\n\tmov\t%rdx, @acc[2]\n\n\tmulq\t@acc[8]\t\t\t\t# a[2]*a[0]\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\t mov\t8*5($a_ptr), @acc[11]\n\tmov\t%rdx, @acc[3]\n\n\tmulq\t@acc[9]\t\t\t\t# a[3]*a[0]\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n\n\tmulq\t@acc[10]\t\t\t# a[4]*a[0]\n\tadd\t%rax, @acc[4]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[5]\n\n\tmulq\t@acc[11]\t\t\t# a[5]*a[0]\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[6], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n\n\tmulq\t%rax\t\t\t\t# a[0]*a[0]\n\txor\t@acc[0], @acc[0]\n\tmov\t%rax, 8*0($r_ptr)\n\t mov\t@acc[7], %rax\n\tadd\t@acc[1], @acc[1]\t\t# double acc[1]\n\tadc\t\\$0, @acc[0]\n\tadd\t%rdx, @acc[1]\t\t\t# accumulate a[0]*a[0]\n\tadc\t\\$0, @acc[0]\t\t\t# carries to a[1]*a[1]\n\tmov\t@acc[1], 8*1($r_ptr)\n___\n$hi=@acc[1];\n$code.=<<___;\n\t#########################################\n\tmulq\t@acc[8]\t\t\t\t# a[2]*a[1]\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[7], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t@acc[9]\t\t\t\t# a[3]*a[1]\n\tadd\t%rax, @acc[4]\n\tmov\t@acc[7], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t@acc[10]\t\t\t# a[4]*a[1]\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[7], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[5]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t@acc[11]\t\t\t# a[5]*a[1]\n\tadd\t%rax, @acc[6]\n\tmov\t@acc[7], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[6]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t%rax\t\t\t\t# a[1]*a[1]\n\txor\t@acc[1], @acc[1]\n\tadd\t%rax, @acc[0]\t\t\t# can't carry\n\t mov\t@acc[8], %rax\n\tadd\t@acc[2], @acc[2]\t\t# double acc[2:3]\n\tadc\t@acc[3], @acc[3]\n\tadc\t\\$0, @acc[1]\n\tadd\t@acc[0], @acc[2]\t\t# accumulate a[1]*a[1]\n\tadc\t%rdx, @acc[3]\n\tadc\t\\$0, @acc[1]\t\t\t# carries to a[2]*a[2]\n\tmov\t@acc[2], 8*2($r_ptr)\n___\n$hi=@acc[0];\n$code.=<<___;\n\t#########################################\n\tmulq\t@acc[9]\t\t\t\t# a[3]*a[2]\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[8], %rax\n\tadc\t\\$0, %rdx\n\t mov\t@acc[3], 8*3($r_ptr)\n\tmov\t%rdx, $hi\n\n\tmulq\t@acc[10]\t\t\t# a[4]*a[2]\n\tadd\t%rax, @acc[6]\n\tmov\t@acc[8], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[6]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t@acc[11]\t\t\t# a[5]*a[2]\n\tadd\t%rax, @acc[7]\n\tmov\t@acc[8], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[7]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[8]\n\n\tmulq\t%rax\t\t\t\t# a[2]*a[2]\n\txor\t@acc[3], @acc[3]\n\tadd\t%rax, @acc[1]\t\t\t# can't carry\n\t mov\t@acc[9], %rax\n\tadd\t@acc[4], @acc[4]\t\t# double acc[4:5]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, @acc[3]\n\tadd\t@acc[1], @acc[4]\t\t# accumulate a[2]*a[2]\n\tadc\t%rdx, @acc[5]\n\tadc\t\\$0, @acc[3]\t\t\t# carries to a[3]*a[3]\n\tmov\t@acc[4], 8*4($r_ptr)\n\n\t#########################################\n\tmulq\t@acc[10]\t\t\t# a[4]*a[3]\n\tadd\t%rax, @acc[7]\n\tmov\t@acc[9], %rax\n\tadc\t\\$0, %rdx\n\t mov\t@acc[5], 8*5($r_ptr)\n\tmov\t%rdx, $hi\n\n\tmulq\t@acc[11]\t\t\t# a[5]*a[3]\n\tadd\t%rax, @acc[8]\n\tmov\t@acc[9], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[8]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[9]\n\n\tmulq\t%rax\t\t\t\t# a[3]*a[3]\n\txor\t@acc[4], @acc[4]\n\tadd\t%rax, @acc[3]\t\t\t# can't carry\n\t mov\t@acc[10], %rax\n\tadd\t@acc[6], @acc[6]\t\t# double acc[6:7]\n\tadc\t@acc[7], @acc[7]\n\tadc\t\\$0, @acc[4]\n\tadd\t@acc[3], @acc[6]\t\t# accumulate a[3]*a[3]\n\tadc\t%rdx, @acc[7]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tadc\t\\$0, @acc[4]\t\t\t# carries to a[4]*a[4]\n\tmov\t@acc[7], 8*7($r_ptr)\n\n\t#########################################\n\tmulq\t@acc[11]\t\t\t# a[5]*a[4]\n\tadd\t%rax, @acc[9]\n\tmov\t@acc[10], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[10]\n\n\tmulq\t%rax\t\t\t\t# a[4]*a[4]\n\txor\t@acc[5], @acc[5]\n\tadd\t%rax, @acc[4]\t\t\t# can't carry\n\t mov\t@acc[11], %rax\n\tadd\t@acc[8], @acc[8]\t\t# double acc[8:9]\n\tadc\t@acc[9], @acc[9]\n\tadc\t\\$0, @acc[5]\n\tadd\t@acc[4], @acc[8]\t\t# accumulate a[4]*a[4]\n\tadc\t%rdx, @acc[9]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tadc\t\\$0, @acc[5]\t\t\t# carries to a[5]*a[5]\n\tmov\t@acc[9], 8*9($r_ptr)\n\n\t#########################################\n\tmulq\t%rax\t\t\t\t# a[5]*a[5]\n\tadd\t@acc[5], %rax\t\t\t# can't carry\n\tadd\t@acc[10], @acc[10]\t\t# double acc[10]\n\tadc\t\\$0, %rdx\n\tadd\t@acc[10], %rax\t\t\t# accumulate a[5]*a[5]\n\tadc\t\\$0, %rdx\n\tmov\t%rax, 8*10($r_ptr)\n\tmov\t%rdx, 8*11($r_ptr)\n\n\tret\n.size\t__sqrq_384,.-__sqrq_384\n\n.globl\tsqr_mont_384\n.hidden\tsqr_mont_384\n.type\tsqr_mont_384,\\@function,4,\"unwind\"\n.align\t32\nsqr_mont_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_mont_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8*15, %rsp\n.cfi_adjust_cfa_offset\t8*15\n.cfi_end_prologue\n\n\tmov\t$n_ptr, 8*12(%rsp)\t# n0\n\tmov\t$b_org, 8*13(%rsp)\t# n_ptr\n\tmov\t$r_ptr, 8*14(%rsp)\n\n\tmov\t%rsp, $r_ptr\n\tcall\t__sqrq_384\n\n\tlea\t0(%rsp), $a_ptr\n\tmov\t8*12(%rsp), %rcx\t# n0 for mul_by_1\n\tmov\t8*13(%rsp), $b_ptr\t# n_ptr for mul_by_1\n\tmov\t8*14(%rsp), $r_ptr\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tlea\t8*15(%rsp), %r8\t\t# size optimization\n\tmov\t8*15(%rsp), %r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8), %r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8), %r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8), %r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8), %rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8), %rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8), %rsp\n.cfi_adjust_cfa_offset\t-8*21\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_mont_384,.-sqr_mont_384\n___\n}\n{ ########################################################## 384-bit redc_mont\nmy ($n_ptr, $n0)=($b_ptr, $n_ptr);\t# arguments are \"shifted\"\n\n$code.=<<___;\n########################################################################\n# void redc_mont_384(uint64_t ret[6], const uint64_t a[12],\n#                    uint64_t m[6], uint64_t n0);\n.globl\tredc_mont_384\n.hidden\tredc_mont_384\n.type\tredc_mont_384,\\@function,4,\"unwind\"\n.align\t32\nredc_mont_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tredc_mont_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tredc_mont_384,.-redc_mont_384\n\n########################################################################\n# void from_mont_384(uint64_t ret[6], const uint64_t a[6],\n#                    uint64_t m[6], uint64_t n0);\n.globl\tfrom_mont_384\n.hidden\tfrom_mont_384\n.type\tfrom_mont_384,\\@function,4,\"unwind\"\n.align\t32\nfrom_mont_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tfrom_mont_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\tcall\t__mulq_by_1_mont_384\n\n\t#################################\n\t# Branch-less conditional acc[0:6] - modulus\n\n\t#mov\t@acc[6], %rax\t\t# __mulq_by_1_mont_384 does it\n\tmov\t@acc[7], %rcx\n\tmov\t@acc[0], %rdx\n\tmov\t@acc[1], %rbp\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[7]\n\tmov\t@acc[2], @acc[5]\n\tsbb\t8*2($n_ptr), @acc[0]\n\tsbb\t8*3($n_ptr), @acc[1]\n\tsbb\t8*4($n_ptr), @acc[2]\n\tmov\t@acc[3], $a_ptr\n\tsbb\t8*5($n_ptr), @acc[3]\n\n\tcmovc\t%rax, @acc[6]\n\tcmovc\t%rcx, @acc[7]\n\tcmovc\t%rdx, @acc[0]\n\tmov\t@acc[6], 8*0($r_ptr)\n\tcmovc\t%rbp, @acc[1]\n\tmov\t@acc[7], 8*1($r_ptr)\n\tcmovc\t@acc[5], @acc[2]\n\tmov\t@acc[0], 8*2($r_ptr)\n\tcmovc\t$a_ptr,  @acc[3]\n\tmov\t@acc[1], 8*3($r_ptr)\n\tmov\t@acc[2], 8*4($r_ptr)\n\tmov\t@acc[3], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tfrom_mont_384,.-from_mont_384\n___\n{ my @acc=@acc;\t\t\t\t# will be rotated locally\n\n$code.=<<___;\n.type\t__mulq_by_1_mont_384,\\@abi-omnipotent\n.align\t32\n__mulq_by_1_mont_384:\n\tmov\t8*0($a_ptr), %rax\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tmov\t%rax, @acc[6]\n\timulq\t$n0, %rax\n\tmov\t%rax, @acc[0]\n___\nfor (my $i=0; $i<6; $i++) {\nmy $hi = @acc[6];\n$code.=<<___;\n\t################################# reduction $i\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, @acc[6]\t\t# guaranteed to be zero\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, @acc[6]\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[6], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($n_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($n_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n___\n$code.=<<___\tif ($i<5);\n\t mov\t@acc[1], @acc[7]\n\t imulq\t$n0, @acc[1]\n___\n$code.=<<___;\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*4($n_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*5($n_ptr)\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[1], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[5]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\tret\n.size\t__mulq_by_1_mont_384,.-__mulq_by_1_mont_384\n\n.type\t__redq_tail_mont_384,\\@abi-omnipotent\n.align\t32\n__redq_tail_mont_384:\n\tadd\t8*6($a_ptr), @acc[0]\t# accumulate upper half\n\tmov\t@acc[0], %rax\n\tadc\t8*7($a_ptr), @acc[1]\n\tadc\t8*8($a_ptr), @acc[2]\n\tadc\t8*9($a_ptr), @acc[3]\n\tmov\t@acc[1], %rcx\n\tadc\t8*10($a_ptr), @acc[4]\n\tadc\t8*11($a_ptr), @acc[5]\n\tsbb\t@acc[6], @acc[6]\n\n\t#################################\n\t# Branch-less conditional acc[0:6] - modulus\n\n\tmov\t@acc[2], %rdx\n\tmov\t@acc[3], %rbp\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tmov\t@acc[4], @acc[7]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tmov\t@acc[5], $a_ptr\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, @acc[6]\n\n\tcmovc\t%rax, @acc[0]\n\tcmovc\t%rcx, @acc[1]\n\tcmovc\t%rdx, @acc[2]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t%rbp, @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[7], @acc[4]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tcmovc\t$a_ptr,  @acc[5]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__redq_tail_mont_384,.-__redq_tail_mont_384\n\n.globl\tsgn0_pty_mont_384\n.hidden\tsgn0_pty_mont_384\n.type\tsgn0_pty_mont_384,\\@function,3,\"unwind\"\n.align\t32\nsgn0_pty_mont_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsgn0_pty_mont_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$a_ptr, $n_ptr\n\tlea\t0($r_ptr), $a_ptr\n\tmov\t$b_org, $n0\n\tcall\t__mulq_by_1_mont_384\n\n\txor\t%rax, %rax\n\tmov\t@acc[0], @acc[7]\n\tadd\t@acc[0], @acc[0]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, %rax\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, %rax\n\n\tnot\t%rax\t\t\t# 2*x > p, which means \"negative\"\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, %rax\n\tor\t@acc[7], %rax\t\t# pack sign and parity\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsgn0_pty_mont_384,.-sgn0_pty_mont_384\n\n.globl\tsgn0_pty_mont_384x\n.hidden\tsgn0_pty_mont_384x\n.type\tsgn0_pty_mont_384x,\\@function,3,\"unwind\"\n.align\t32\nsgn0_pty_mont_384x:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsgn0_pty_mont_384x\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$a_ptr, $n_ptr\n\tlea\t48($r_ptr), $a_ptr\t# sgn0(a->im)\n\tmov\t$b_org, $n0\n\tcall\t__mulq_by_1_mont_384\n\n\tmov\t@acc[0], @acc[6]\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[2], @acc[0]\n\tor\t@acc[3], @acc[0]\n\tor\t@acc[4], @acc[0]\n\tor\t@acc[5], @acc[0]\n\n\tlea\t0($r_ptr), $a_ptr\t# sgn0(a->re)\n\txor\t$r_ptr, $r_ptr\n\tmov\t@acc[6], @acc[7]\n\tadd\t@acc[6], @acc[6]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, $r_ptr\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $r_ptr\n\n\tmov\t@acc[0], 0(%rsp)\t# a->im is zero or not\n\tnot\t$r_ptr\t\t\t# 2*x > p, which means \"negative\"\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, $r_ptr\n\tor\t@acc[7], $r_ptr\t\t# pack sign and parity\n\n\tcall\t__mulq_by_1_mont_384\n\n\tmov\t@acc[0], @acc[6]\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[2], @acc[0]\n\tor\t@acc[3], @acc[0]\n\tor\t@acc[4], @acc[0]\n\tor\t@acc[5], @acc[0]\n\n\txor\t%rax, %rax\n\tmov\t@acc[6], @acc[7]\n\tadd\t@acc[6], @acc[6]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, %rax\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, %rax\n\n\tmov\t0(%rsp), @acc[6]\n\n\tnot\t%rax\t\t\t# 2*x > p, which means \"negative\"\n\n\ttest\t@acc[0], @acc[0]\n\tcmovz\t$r_ptr, @acc[7]\t\t# a->re==0? prty(a->im) : prty(a->re)\n\n\ttest\t@acc[6], @acc[6]\n\tcmovnz\t$r_ptr, %rax\t\t# a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, %rax\n\tor\t@acc[7], %rax\t\t# pack sign and parity\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsgn0_pty_mont_384x,.-sgn0_pty_mont_384x\n___\n} }\n\n{ ########################################################## mulq_mont\nmy ($bi, $hi) = (\"%rdi\", \"%rbp\");\n\n$code.=<<___;\n.globl\tmul_mont_384\n.hidden\tmul_mont_384\n.type\tmul_mont_384,\\@function,5,\"unwind\"\n.align\t32\nmul_mont_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tmul_mont_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8*3, %rsp\n.cfi_adjust_cfa_offset\t8*3\n.cfi_end_prologue\n\n\tmov\t8*0($b_org), %rax\n\tmov\t8*0($a_ptr), @acc[6]\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[4]\n\tmov\t8*3($a_ptr), @acc[5]\n\tmov\t$b_org, $b_ptr\t\t# evacuate from %rdx\n\tmov\t$n0,    8*0(%rsp)\n\tmov\t$r_ptr, 8*1(%rsp)\t# to __mulq_mont_384\n\n\tcall\t__mulq_mont_384\n\n\tmov\t24(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t32(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t40(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t48(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t56(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t64(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t72(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-72\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmul_mont_384,.-mul_mont_384\n___\n{ my @acc=@acc;\t\t\t\t# will be rotated locally\n\n$code.=<<___;\n.type\t__mulq_mont_384,\\@abi-omnipotent\n.align\t32\n__mulq_mont_384:\n\tmov\t%rax, $bi\n\tmulq\t@acc[6]\t\t\t# a[0]*b[0]\n\tmov\t%rax, @acc[0]\n\tmov\t$bi, %rax\n\tmov\t%rdx, @acc[1]\n\n\tmulq\t@acc[7]\t\t\t# a[1]*b[0]\n\tadd\t%rax, @acc[1]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[2]\n\n\tmulq\t@acc[4]\t\t\t# a[2]*b[0]\n\tadd\t%rax, @acc[2]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[3]\n\n\t mov\t@acc[0], $hi\n\t imulq\t8(%rsp), @acc[0]\n\n\tmulq\t@acc[5]\t\t\t# a[3]*b[0]\n\tadd\t%rax, @acc[3]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n\n\tmulq\t8*4($a_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[5]\n\n\tmulq\t8*5($a_ptr)\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\txor\t@acc[7], @acc[7]\n\tmov\t%rdx, @acc[6]\n___\nfor (my $i=0; $i<6;) {\nmy $b_next = $i<5 ? 8*($i+1).\"($b_ptr)\" : @acc[1];\n$code.=<<___;\n\t################################# reduction $i\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, $hi\t\t# guaranteed to be zero\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, $hi\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($n_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($n_ptr)\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*4($n_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*5($n_ptr)\n\tadd\t%rax, @acc[5]\n\tmov\t$b_next, %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[5]\n\tadc\t%rdx, @acc[6]\n\tadc\t\\$0, @acc[7]\n___\n    push(@acc,shift(@acc));\n$code.=<<___\tif ($i++<5);\n\t################################# Multiply by b[$i]\n\tmov\t%rax, $bi\n\tmulq\t8*0($a_ptr)\n\tadd\t%rax, @acc[0]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*1($a_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*2($a_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\t mov\t@acc[0], $hi\n\t imulq\t8(%rsp), @acc[0]\n\n\tmulq\t8*3($a_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*4($a_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*5($a_ptr)\n\tadd\t@acc[7], @acc[5]\n\tadc\t\\$0, %rdx\n\txor\t@acc[7], @acc[7]\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, @acc[6]\n\tadc\t\\$0, @acc[7]\n___\n}\n$code.=<<___;\n\t#################################\n\t# Branch-less conditional acc[0:6] - modulus\n\n\t#mov\t@acc[0], %rax\n\tmov\t8*2(%rsp), $r_ptr\t# restore $r_ptr\n\tsub\t8*0($n_ptr), @acc[0]\n\tmov\t@acc[1], %rdx\n\tsbb\t8*1($n_ptr), @acc[1]\n\tmov\t@acc[2], $b_ptr\n\tsbb\t8*2($n_ptr), @acc[2]\n\tmov\t@acc[3], $a_ptr\n\tsbb\t8*3($n_ptr), @acc[3]\n\tmov\t@acc[4], $hi\n\tsbb\t8*4($n_ptr), @acc[4]\n\tmov\t@acc[5], @acc[7]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, @acc[6]\n\n\tcmovc\t%rax,    @acc[0]\n\tcmovc\t%rdx,    @acc[1]\n\tcmovc\t$b_ptr,  @acc[2]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t$a_ptr,  @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t$hi,     @acc[4]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tcmovc\t@acc[7], @acc[5]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__mulq_mont_384,.-__mulq_mont_384\n___\n} }\n$code.=<<___;\n.globl\tsqr_n_mul_mont_384\n.hidden\tsqr_n_mul_mont_384\n.type\tsqr_n_mul_mont_384,\\@function,6,\"unwind\"\n.align\t32\nsqr_n_mul_mont_384:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_n_mul_mont_384\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8*17, %rsp\n.cfi_adjust_cfa_offset\t8*17\n.cfi_end_prologue\n\n\tmov\t$n0,    8*0(%rsp)\n\tmov\t$r_ptr, 8*1(%rsp)\t# to __mulq_mont_384\n\tmov\t$n_ptr, 8*2(%rsp)\n\tlea\t8*4(%rsp), $r_ptr\n\tmov\t%r9, 8*3(%rsp)\t\t# 6th, multiplicand argument\n\tmovq\t(%r9), %xmm2\t\t# prefetch b[0]\n\n.Loop_sqr_384:\n\tmovd\t%edx, %xmm1\t\t# loop counter\n\n\tcall\t__sqrq_384\n\n\tlea\t0($r_ptr), $a_ptr\n\tmov\t8*0(%rsp), %rcx\t\t# n0 for mul_by_1\n\tmov\t8*2(%rsp), $b_ptr\t# n_ptr for mul_by_1\n\tcall\t__mulq_by_1_mont_384\n\tcall\t__redq_tail_mont_384\n\n\tmovd\t%xmm1, %edx\n\tlea\t0($r_ptr), $a_ptr\n\tdec\t%edx\n\tjnz\t.Loop_sqr_384\n\n\tmovq\t%xmm2, %rax\t\t# b[0]\n\tmov\t$b_ptr, $n_ptr\n\tmov\t8*3(%rsp), $b_ptr\t# 6th, multiplicand argument\n\n\t#mov\t8*0($b_ptr), %rax\n\t#mov\t8*0($a_ptr), @acc[6]\n\t#mov\t8*1($a_ptr), @acc[7]\n\t#mov\t8*2($a_ptr), @acc[4]\n\t#mov\t8*3($a_ptr), @acc[5]\n\tmov\t@acc[0], @acc[4]\n\tmov\t@acc[1], @acc[5]\n\n\tcall\t__mulq_mont_384\n\n\tlea\t8*17(%rsp), %r8\t\t# size optimization\n\tmov\t8*17(%rsp), %r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8), %r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8), %r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8), %r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8), %rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8), %rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8), %rsp\n.cfi_adjust_cfa_offset\t-8*23\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_n_mul_mont_384,.-sqr_n_mul_mont_384\n\n.globl\tsqr_n_mul_mont_383\n.hidden\tsqr_n_mul_mont_383\n.type\tsqr_n_mul_mont_383,\\@function,6,\"unwind\"\n.align\t32\nsqr_n_mul_mont_383:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_n_mul_mont_383\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8*17, %rsp\n.cfi_adjust_cfa_offset\t8*17\n.cfi_end_prologue\n\n\tmov\t$n0, 8*0(%rsp)\n\tmov\t$r_ptr, 8*1(%rsp)\t# to __mulq_mont_384\n\tmov\t$n_ptr, 8*2(%rsp)\n\tlea\t8*4(%rsp), $r_ptr\n\tmov\t%r9, 8*3(%rsp)\t\t# 6th, multiplicand argument\n\tmovq\t(%r9), %xmm2\t\t# prefetch b[0]\n\n.Loop_sqr_383:\n\tmovd\t%edx, %xmm1\t\t# loop counter\n\n\tcall\t__sqrq_384\n\n\tlea\t0($r_ptr), $a_ptr\n\tmov\t8*0(%rsp), %rcx\t\t# n0 for mul_by_1\n\tmov\t8*2(%rsp), $b_ptr\t# n_ptr for mul_by_1\n\tcall\t__mulq_by_1_mont_384\n\n\tmovd\t%xmm1, %edx\t\t# loop counter\n        add     8*6($a_ptr), @acc[6]\t# just accumulate upper half\n        adc     8*7($a_ptr), @acc[7]\n        adc     8*8($a_ptr), @acc[0]\n        adc     8*9($a_ptr), @acc[1]\n        adc     8*10($a_ptr), @acc[2]\n        adc     8*11($a_ptr), @acc[3]\n\tlea\t0($r_ptr), $a_ptr\n\n\tmov\t@acc[6], 8*0($r_ptr)\t# omitting full reduction gives ~5%\n\tmov\t@acc[7], 8*1($r_ptr)\t# in addition-chains\n\tmov\t@acc[0], 8*2($r_ptr)\n\tmov\t@acc[1], 8*3($r_ptr)\n\tmov\t@acc[2], 8*4($r_ptr)\n\tmov\t@acc[3], 8*5($r_ptr)\n\n\tdec\t%edx\n\tjnz\t.Loop_sqr_383\n\n\tmovq\t%xmm2, %rax\t\t# b[0]\n\tmov\t$b_ptr, $n_ptr\n\tmov\t8*3(%rsp), $b_ptr\t# 6th, multiplicand argument\n\n\t#movq\t8*0($b_ptr), %rax\n\t#mov\t8*0($a_ptr), @acc[6]\n\t#mov\t8*1($a_ptr), @acc[7]\n\t#mov\t8*2($a_ptr), @acc[4]\n\t#mov\t8*3($a_ptr), @acc[5]\n\tmov\t@acc[0], @acc[4]\n\tmov\t@acc[1], @acc[5]\n\n\tcall\t__mulq_mont_384\t\t# formally one can omit full reduction\n\t\t\t\t\t# even after multiplication...\n\tlea\t8*17(%rsp), %r8\t\t# size optimization\n\tmov\t8*17(%rsp), %r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8), %r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8), %r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8), %r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8), %rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8), %rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8), %rsp\n.cfi_adjust_cfa_offset\t-8*23\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_n_mul_mont_383,.-sqr_n_mul_mont_383\n___\n{ my @acc=@acc;\t\t\t\t# will be rotated locally\n  my $bi = \"%rbp\";\n\n$code.=<<___;\n.type\t__mulq_mont_383_nonred,\\@abi-omnipotent\n.align\t32\n__mulq_mont_383_nonred:\n\tmov\t%rax, $bi\n\tmulq\t@acc[6]\t\t\t# a[0]*b[0]\n\tmov\t%rax, @acc[0]\n\tmov\t$bi, %rax\n\tmov\t%rdx, @acc[1]\n\n\tmulq\t@acc[7]\t\t\t# a[1]*b[0]\n\tadd\t%rax, @acc[1]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[2]\n\n\tmulq\t@acc[4]\t\t\t# a[2]*b[0]\n\tadd\t%rax, @acc[2]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[3]\n\n\t mov\t@acc[0], @acc[7]\n\t imulq\t8(%rsp), @acc[0]\n\n\tmulq\t@acc[5]\t\t\t# a[3]*b[0]\n\tadd\t%rax, @acc[3]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n\n\tmulq\t8*4($a_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[5]\n\n\tmulq\t8*5($a_ptr)\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n___\nfor (my $i=0; $i<6;) {\nmy $b_next = $i<5 ? 8*($i+1).\"($b_ptr)\" : @acc[1];\n$code.=<<___;\n\t################################# reduction $i\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, @acc[7]\t\t# guaranteed to be zero\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, @acc[7]\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*2($n_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*3($n_ptr)\n\tadd\t@acc[7], @acc[3]\n\tadc\t\\$0, %rdx\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*4($n_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[7]\n\n\tmulq\t8*5($n_ptr)\n\tadd\t%rax, @acc[5]\n\tmov\t$b_next, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[7], @acc[5]\n\tadc\t%rdx, @acc[6]\n___\n    push(@acc,shift(@acc));\n$code.=<<___\tif ($i++<5);\n\t################################# Multiply by b[$i]\n\tmov\t%rax, $bi\n\tmulq\t8*0($a_ptr)\n\tadd\t%rax, @acc[0]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n\n\tmulq\t8*1($a_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[6], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n\n\tmulq\t8*2($a_ptr)\n\tadd\t%rax, @acc[2]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[6], @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n\n\t mov\t@acc[0], @acc[7]\n\t imulq\t8(%rsp), @acc[0]\n\n\tmulq\t8*3($a_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[6], @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n\n\tmulq\t8*4($a_ptr)\n\tadd\t%rax, @acc[4]\n\tmov\t$bi, %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[6], @acc[4]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n\n\tmulq\t8*5($a_ptr)\n\tadd\t@acc[6], @acc[5]\n\tadc\t\\$0, %rdx\n\tadd\t%rax, @acc[5]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[6]\n___\n}\n$code.=<<___;\n\tret\n.size\t__mulq_mont_383_nonred,.-__mulq_mont_383_nonred\n___\n}\n{ my $frame = 4*8 +\t# place for argument off-load +\n\t      2*384/8 +\t# place for 2 384-bit temporary vectors\n\t      8;\t# align\nmy @acc = (@acc,\"%rax\",\"%rdx\",\"%rbx\",\"%rbp\");\n\n# omitting 3 reductions gives 8-11% better performance in add-chains\n$code.=<<___;\n.globl\tsqr_mont_382x\n.hidden\tsqr_mont_382x\n.type\tsqr_mont_382x,\\@function,4,\"unwind\"\n.align\t32\nsqr_mont_382x:\n.cfi_startproc\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$1, __blst_platform_cap(%rip)\n\tjnz\tsqr_mont_382x\\$1\n#endif\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tmov\t$n_ptr, 8*0(%rsp)\t# n0\n\tmov\t$b_org, $n_ptr\t\t# n_ptr\n\tmov\t$a_ptr, 8*2(%rsp)\n\tmov\t$r_ptr, 8*3(%rsp)\n\n\t#################################\n\tmov\t8*0($a_ptr), @acc[0]\t# a->re\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tmov\t@acc[0], @acc[6]\n\tadd\t8*6($a_ptr), @acc[0]\t# a->re + a->im\n\tmov\t@acc[1], @acc[7]\n\tadc\t8*7($a_ptr), @acc[1]\n\tmov\t@acc[2], @acc[8]\n\tadc\t8*8($a_ptr), @acc[2]\n\tmov\t@acc[3], @acc[9]\n\tadc\t8*9($a_ptr), @acc[3]\n\tmov\t@acc[4], @acc[10]\n\tadc\t8*10($a_ptr), @acc[4]\n\tmov\t@acc[5], @acc[11]\n\tadc\t8*11($a_ptr), @acc[5]\n\n\tsub\t8*6($a_ptr), @acc[6]\t# a->re - a->im\n\tsbb\t8*7($a_ptr), @acc[7]\n\tsbb\t8*8($a_ptr), @acc[8]\n\tsbb\t8*9($a_ptr), @acc[9]\n\tsbb\t8*10($a_ptr), @acc[10]\n\tsbb\t8*11($a_ptr), @acc[11]\n\tsbb\t$r_ptr, $r_ptr\t\t# borrow flag as mask\n\n\tmov\t@acc[0], 32+8*0(%rsp)\t# t0\n\tmov\t@acc[1], 32+8*1(%rsp)\n\tmov\t@acc[2], 32+8*2(%rsp)\n\tmov\t@acc[3], 32+8*3(%rsp)\n\tmov\t@acc[4], 32+8*4(%rsp)\n\tmov\t@acc[5], 32+8*5(%rsp)\n\n\tmov\t@acc[6], 32+8*6(%rsp)\t# t1\n\tmov\t@acc[7], 32+8*7(%rsp)\n\tmov\t@acc[8], 32+8*8(%rsp)\n\tmov\t@acc[9], 32+8*9(%rsp)\n\tmov\t@acc[10], 32+8*10(%rsp)\n\tmov\t@acc[11], 32+8*11(%rsp)\n\tmov\t$r_ptr,   32+8*12(%rsp)\n\n\t################################# mul_mont_384(ret->im, a->re, a->im, mod, n0);\n\t#mov\t8*2(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_ptr\t# a->im\n\n\tmov\t48($a_ptr), %rax\t# a->im\n\tmov\t8*0($a_ptr), @acc[6]\t# a->re\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[4]\n\tmov\t8*3($a_ptr), @acc[5]\n\n\tmov\t8*3(%rsp), $r_ptr\n\tcall\t__mulq_mont_383_nonred\n___\n{\nmy @acc = map(\"%r$_\",14,15,8..11,\t# output from __mulq_mont_384\n                     12,13,\"ax\",\"bx\",\"bp\",\"si\");\n$code.=<<___;\n\tadd\t@acc[0], @acc[0]\t# add with itself\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\n\tmov\t@acc[0],  8*6($r_ptr)\t# ret->im\n\tmov\t@acc[1],  8*7($r_ptr)\n\tmov\t@acc[2],  8*8($r_ptr)\n\tmov\t@acc[3],  8*9($r_ptr)\n\tmov\t@acc[4],  8*10($r_ptr)\n\tmov\t@acc[5],  8*11($r_ptr)\n___\n}\n$code.=<<___;\n\t################################# mul_mont_384(ret->re, t0, t1, mod, n0);\n\tlea\t32(%rsp), $a_ptr\t# t0\n\tlea\t32+8*6(%rsp), $b_ptr\t# t1\n\n\tmov\t32+8*6(%rsp), %rax\t# t1[0]\n\tmov\t32+8*0(%rsp), @acc[6]\t# t0[0..3]\n\tmov\t32+8*1(%rsp), @acc[7]\n\tmov\t32+8*2(%rsp), @acc[4]\n\tmov\t32+8*3(%rsp), @acc[5]\n\n\tcall\t__mulq_mont_383_nonred\n___\n{\nmy @acc = map(\"%r$_\",14,15,8..11,\t# output from __mulq_mont_384\n                     12,13,\"ax\",\"bx\",\"bp\",\"si\");\n$code.=<<___;\n\tmov\t32+8*12(%rsp), @acc[11]\t# account for sign from a->re - a->im\n\tmov\t32+8*0(%rsp), @acc[6]\n\tmov\t32+8*1(%rsp), @acc[7]\n\tand\t@acc[11], @acc[6]\n\tmov\t32+8*2(%rsp), @acc[8]\n\tand\t@acc[11], @acc[7]\n\tmov\t32+8*3(%rsp), @acc[9]\n\tand\t@acc[11], @acc[8]\n\tmov\t32+8*4(%rsp), @acc[10]\n\tand\t@acc[11], @acc[9]\n\tand\t@acc[11], @acc[10]\n\tand\t32+8*5(%rsp), @acc[11]\n\n\tsub\t@acc[6], @acc[0]\n\tmov\t8*0($n_ptr), @acc[6]\n\tsbb\t@acc[7], @acc[1]\n\tmov\t8*1($n_ptr), @acc[7]\n\tsbb\t@acc[8], @acc[2]\n\tmov\t8*2($n_ptr), @acc[8]\n\tsbb\t@acc[9], @acc[3]\n\tmov\t8*3($n_ptr), @acc[9]\n\tsbb\t@acc[10], @acc[4]\n\tmov\t8*4($n_ptr), @acc[10]\n\tsbb\t@acc[11], @acc[5]\n\tsbb\t@acc[11], @acc[11]\n\n\tand\t@acc[11], @acc[6]\n\tand\t@acc[11], @acc[7]\n\tand\t@acc[11], @acc[8]\n\tand\t@acc[11], @acc[9]\n\tand\t@acc[11], @acc[10]\n\tand\t8*5($n_ptr), @acc[11]\n\n\tadd\t@acc[6], @acc[0]\n\tadc\t@acc[7], @acc[1]\n\tadc\t@acc[8], @acc[2]\n\tadc\t@acc[9], @acc[3]\n\tadc\t@acc[10], @acc[4]\n\tadc\t@acc[11], @acc[5]\n\n\tmov\t@acc[0],  8*0($r_ptr)\t# ret->re\n\tmov\t@acc[1],  8*1($r_ptr)\n\tmov\t@acc[2],  8*2($r_ptr)\n\tmov\t@acc[3],  8*3($r_ptr)\n\tmov\t@acc[4],  8*4($r_ptr)\n\tmov\t@acc[5],  8*5($r_ptr)\n___\n}\n$code.=<<___;\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqr_mont_382x,.-sqr_mont_382x\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/mulx_mont_256-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# \"Sparse\" in subroutine names refers to most significant limb of the\n# modulus. Though \"sparse\" is a bit of misnomer, because limitation is\n# just not-all-ones. Or in other words not larger than 2^256-2^192-1.\n# In general Montgomery multiplication algorithm can handle one of the\n# inputs being non-reduced and capped by 1<<radix_width, 1<<256 in this\n# case, rather than the modulus. Whether or not mul_mont_sparse_256, a\n# *taylored* implementation of the algorithm, can handle such input can\n# be circumstantial. For example, in most general case it depends on\n# similar \"bit sparsity\" of individual limbs of the second, fully reduced\n# multiplicand. If you can't make such assumption about the limbs, then\n# non-reduced value shouldn't be larger than \"same old\" 2^256-2^192-1.\n# This requirement can be met by conditionally subtracting \"bitwise\n# left-aligned\" modulus. For example, if modulus is 200 bits wide, you\n# would need to conditionally subtract the value of modulus<<56. Common\n# source of non-reduced values is redc_mont_256 treating 512-bit inputs.\n# Well, more specifically ones with upper half not smaller than modulus.\n# Just in case, why limitation at all and not general-purpose 256-bit\n# subroutines? Unlike the 384-bit case, accounting for additional carry\n# has disproportionate impact on performance, especially in adcx/adox\n# implementation.\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$code.=<<___ if ($flavour =~ /masm/);\n.globl\tmul_mont_sparse_256\\$1\n.globl\tsqr_mont_sparse_256\\$1\n.globl\tfrom_mont_256\\$1\n.globl\tredc_mont_256\\$1\n___\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr,$n0) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\",\"%r8\");\n$b_ptr = \"%rbx\";\n\n{ ############################################################## 255 bits\nmy @acc=map(\"%r$_\",(10..15));\n\n{ ############################################################## mulq\nmy ($lo,$hi)=(\"%rbp\",\"%r9\");\n\n$code.=<<___;\n.text\n\n.globl\tmulx_mont_sparse_256\n.hidden\tmulx_mont_sparse_256\n.type\tmulx_mont_sparse_256,\\@function,5,\"unwind\"\n.align\t32\nmulx_mont_sparse_256:\n.cfi_startproc\nmul_mont_sparse_256\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8,%rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $b_ptr\t\t# evacuate from %rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($b_org), %rdx\n\tmov\t8*0($a_ptr), @acc[4]\n\tmov\t8*1($a_ptr), @acc[5]\n\tmov\t8*2($a_ptr), $lo\n\tmov\t8*3($a_ptr), $hi\n\tlea\t-128($a_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t@acc[4], %rax, @acc[1]\t# a[0]*b[0]\n\tcall\t__mulx_mont_sparse_256\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmulx_mont_sparse_256,.-mulx_mont_sparse_256\n\n.globl\tsqrx_mont_sparse_256\n.hidden\tsqrx_mont_sparse_256\n.type\tsqrx_mont_sparse_256,\\@function,4,\"unwind\"\n.align\t32\nsqrx_mont_sparse_256:\n.cfi_startproc\nsqr_mont_sparse_256\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8,%rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$a_ptr, $b_ptr\n\tmov\t$n_ptr, $n0\n\tmov\t$b_org, $n_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), %rdx\n\tmov\t8*1($a_ptr), @acc[5]\n\tmov\t8*2($a_ptr), $lo\n\tmov\t8*3($a_ptr), $hi\n\tlea\t-128($b_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t%rdx, %rax, @acc[1]\t# a[0]*a[0]\n\tcall\t__mulx_mont_sparse_256\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_mont_sparse_256,.-sqrx_mont_sparse_256\n___\n{\nmy @acc=@acc;\n$code.=<<___;\n.type\t__mulx_mont_sparse_256,\\@abi-omnipotent\n.align\t32\n__mulx_mont_sparse_256:\n\tmulx\t@acc[5], @acc[5], @acc[2]\n\tmulx\t$lo, $lo, @acc[3]\n\tadd\t@acc[5], @acc[1]\n\tmulx\t$hi, $hi, @acc[4]\n\t mov\t8($b_ptr), %rdx\n\tadc\t$lo, @acc[2]\n\tadc\t$hi, @acc[3]\n\tadc\t\\$0, @acc[4]\n\n___\nfor (my $i=1; $i<4; $i++) {\nmy $b_next = $i<3 ? 8*($i+1).\"($b_ptr)\" : \"%rax\";\nmy $a5 = $i==1 ? @acc[5] : $lo;\n$code.=<<___;\n\t mov\t%rax, @acc[0]\n\t imulq\t$n0, %rax\n\n\t################################# Multiply by b[$i]\n\txor\t$a5, $a5\t\t# [@acc[5]=0,] cf=0, of=0\n\tmulx\t8*0+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[1]\n\tadcx\t$hi, @acc[2]\n\n\tmulx\t8*1+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[2]\n\tadcx\t$hi, @acc[3]\n\n\tmulx\t8*2+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[3]\n\tadcx\t$hi, @acc[4]\n\n\tmulx\t8*3+128($a_ptr), $lo, $hi\n\t mov\t%rax, %rdx\n\tadox\t$lo, @acc[4]\n\tadcx\t@acc[5], $hi \t\t# cf=0\n\tadox\t$hi, @acc[5]\t\t# of=0\n\n\t################################# reduction\n\tmulx\t8*0+128($n_ptr), $lo, %rax\n\tadcx\t$lo, @acc[0]\t\t# guaranteed to be zero\n\tadox\t@acc[1], %rax\n\n\tmulx\t8*1+128($n_ptr), $lo, $hi\n\tadcx\t$lo, %rax\t\t# @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3+128($n_ptr), $lo, $hi\n\t mov\t$b_next, %rdx\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\tadcx\t@acc[0], @acc[4]\n\tadox\t@acc[0], @acc[5]\n\tadcx\t@acc[0], @acc[5]\n\tadox\t@acc[0], @acc[0]\t# acc[5] in next iteration\n\tadc\t\\$0, @acc[0]\t\t# cf=0, of=0\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\timulq\t$n0, %rdx\n\n\t################################# last reduction\n\txor\t$lo, $lo\t\t# cf=0, of=0\n\tmulx\t8*0+128($n_ptr), @acc[0], $hi\n\tadcx\t%rax, @acc[0]\t\t# guaranteed to be zero\n\tadox\t$hi, @acc[1]\n\n\tmulx\t8*1+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3+128($n_ptr), $lo, $hi\n\t mov\t@acc[1], %rdx\n\t lea\t128($n_ptr), $n_ptr\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\t mov\t@acc[2], %rax\n\tadcx\t@acc[0], @acc[4]\n\tadox\t@acc[0], @acc[5]\n\tadc\t\\$0, @acc[5]\n\n\t#################################\n\t# Branch-less conditional acc[1:5] - modulus\n\n\t mov\t@acc[3], $lo\n\tsub\t8*0($n_ptr), @acc[1]\n\tsbb\t8*1($n_ptr), @acc[2]\n\tsbb\t8*2($n_ptr), @acc[3]\n\t mov\t@acc[4], $hi\n\tsbb\t8*3($n_ptr), @acc[4]\n\tsbb\t\\$0, @acc[5]\n\n\tcmovc\t%rdx, @acc[1]\n\tcmovc\t%rax, @acc[2]\n\tcmovc\t$lo,  @acc[3]\n\tmov\t@acc[1], 8*0($r_ptr)\n\tcmovc\t$hi,  @acc[4]\n\tmov\t@acc[2], 8*1($r_ptr)\n\tmov\t@acc[3], 8*2($r_ptr)\n\tmov\t@acc[4], 8*3($r_ptr)\n\n\tret\n.size\t__mulx_mont_sparse_256,.-__mulx_mont_sparse_256\n___\n} }\n{ my ($n_ptr, $n0)=($b_ptr, $n_ptr);\t# arguments are \"shifted\"\n\n$code.=<<___;\n.globl\tfromx_mont_256\n.hidden\tfromx_mont_256\n.type\tfromx_mont_256,\\@function,4,\"unwind\"\n.align\t32\nfromx_mont_256:\n.cfi_startproc\nfrom_mont_256\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\tcall\t__mulx_by_1_mont_256\n\n\t#################################\n\t# Branch-less conditional acc[0:3] - modulus\n\n\t#mov\t@acc[4], %rax\t\t# __mulq_by_1_mont_256 does it\n\tmov\t@acc[5], %rdx\n\tmov\t@acc[0], @acc[2]\n\tmov\t@acc[1], @acc[3]\n\n\tsub\t8*0($n_ptr), @acc[4]\n\tsbb\t8*1($n_ptr), @acc[5]\n\tsbb\t8*2($n_ptr), @acc[0]\n\tsbb\t8*3($n_ptr), @acc[1]\n\n\tcmovnc\t@acc[4], %rax\n\tcmovnc\t@acc[5], %rdx\n\tcmovnc\t@acc[0], @acc[2]\n\tmov\t%rax,    8*0($r_ptr)\n\tcmovnc\t@acc[1], @acc[3]\n\tmov\t%rdx,    8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tfromx_mont_256,.-fromx_mont_256\n\n.globl\tredcx_mont_256\n.hidden\tredcx_mont_256\n.type\tredcx_mont_256,\\@function,4,\"unwind\"\n.align\t32\nredcx_mont_256:\n.cfi_startproc\nredc_mont_256\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\tcall\t__mulx_by_1_mont_256\n\n\tadd\t8*4($a_ptr), @acc[4]\t# accumulate upper half\n\tadc\t8*5($a_ptr), @acc[5]\n\tmov\t@acc[4], %rax\n\tadc\t8*6($a_ptr), @acc[0]\n\tmov\t@acc[5], %rdx\n\tadc\t8*7($a_ptr), @acc[1]\n\tsbb\t$a_ptr, $a_ptr\n\n\t#################################\n\t# Branch-less conditional acc[0:4] - modulus\n\n\tmov\t@acc[0], @acc[2]\n\tsub\t8*0($n_ptr), @acc[4]\n\tsbb\t8*1($n_ptr), @acc[5]\n\tsbb\t8*2($n_ptr), @acc[0]\n\tmov\t@acc[1], @acc[3]\n\tsbb\t8*3($n_ptr), @acc[1]\n\tsbb\t\\$0, $a_ptr\n\n\tcmovnc\t@acc[4], %rax \n\tcmovnc\t@acc[5], %rdx\n\tcmovnc\t@acc[0], @acc[2]\n\tmov\t%rax,    8*0($r_ptr)\n\tcmovnc\t@acc[1], @acc[3]\n\tmov\t%rdx,    8*1($r_ptr)\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tredcx_mont_256,.-redcx_mont_256\n___\n{\nmy @acc=@acc;\n\n$code.=<<___;\n.type\t__mulx_by_1_mont_256,\\@abi-omnipotent\n.align\t32\n__mulx_by_1_mont_256:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), %rax\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\n\tmov\t%rax, @acc[4]\n\timulq\t$n0, %rax\n\tmov\t%rax, @acc[0]\n___\nfor (my $i=0; $i<4; $i++) {\nmy $hi = @acc[4];\n$code.=<<___;\n\t################################# reduction $i\n\tmulq\t8*0($n_ptr)\n\tadd\t%rax, @acc[4]\t\t# guaranteed to be zero\n\tmov\t@acc[0], %rax\n\tadc\t%rdx, @acc[4]\n\n\tmulq\t8*1($n_ptr)\n\tadd\t%rax, @acc[1]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t@acc[4], @acc[1]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*2($n_ptr)\n___\n$code.=<<___\tif ($i<3);\n\t mov\t@acc[1], @acc[5]\n\t imulq\t$n0, @acc[1]\n___\n$code.=<<___;\n\tadd\t%rax, @acc[2]\n\tmov\t@acc[0], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[2]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, $hi\n\n\tmulq\t8*3($n_ptr)\n\tadd\t%rax, @acc[3]\n\tmov\t@acc[1], %rax\n\tadc\t\\$0, %rdx\n\tadd\t$hi, @acc[3]\n\tadc\t\\$0, %rdx\n\tmov\t%rdx, @acc[4]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\tret\n.size\t__mulx_by_1_mont_256,.-__mulx_by_1_mont_256\n___\n} } }\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/mulx_mont_384-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n\n$flavour = shift;\n$output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$code.=<<___ if ($flavour =~ /masm/);\n.globl\tmul_mont_384x\\$1\n.globl\tsqr_mont_384x\\$1\n.globl\tmul_382x\\$1\n.globl\tsqr_382x\\$1\n.globl\tmul_384\\$1\n.globl\tsqr_384\\$1\n.globl\tredc_mont_384\\$1\n.globl\tfrom_mont_384\\$1\n.globl\tsgn0_pty_mont_384\\$1\n.globl\tsgn0_pty_mont_384x\\$1\n.globl\tmul_mont_384\\$1\n.globl\tsqr_mont_384\\$1\n.globl\tsqr_n_mul_mont_384\\$1\n.globl\tsqr_n_mul_mont_383\\$1\n.globl\tsqr_mont_382x\\$1\n___\n\n# common argument layout\n($r_ptr,$a_ptr,$b_org,$n_ptr,$n0) = (\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\",\"%r8\");\n$b_ptr = \"%rbx\";\n\n# common accumulator layout\n@acc=map(\"%r$_\",(8..15));\n\n########################################################################\n{ my @acc=(@acc,\"%rax\",\"%rbx\",\"%rbp\",$a_ptr);\t# all registers are affected\n\t\t\t\t\t\t# except for $n_ptr and $r_ptr\n$code.=<<___;\n.text\n\n########################################################################\n# Double-width subtraction modulo n<<384, as opposite to naively\n# expected modulo n*n. It works because n<<384 is the actual\n# input boundary condition for Montgomery reduction, not n*n.\n# Just in case, this is duplicated, but only one module is\n# supposed to be linked...\n.type\t__subx_mod_384x384,\\@abi-omnipotent\n.align\t32\n__subx_mod_384x384:\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tmov\t8*6($a_ptr), @acc[6]\n\n\tsub\t8*0($b_org), @acc[0]\n\tmov\t8*7($a_ptr), @acc[7]\n\tsbb\t8*1($b_org), @acc[1]\n\tmov\t8*8($a_ptr), @acc[8]\n\tsbb\t8*2($b_org), @acc[2]\n\tmov\t8*9($a_ptr), @acc[9]\n\tsbb\t8*3($b_org), @acc[3]\n\tmov\t8*10($a_ptr), @acc[10]\n\tsbb\t8*4($b_org), @acc[4]\n\tmov\t8*11($a_ptr), @acc[11]\n\tsbb\t8*5($b_org), @acc[5]\n\t mov\t@acc[0], 8*0($r_ptr)\n\tsbb\t8*6($b_org), @acc[6]\n\t mov\t8*0($n_ptr), @acc[0]\n\t mov\t@acc[1], 8*1($r_ptr)\n\tsbb\t8*7($b_org), @acc[7]\n\t mov\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[2], 8*2($r_ptr)\n\tsbb\t8*8($b_org), @acc[8]\n\t mov\t8*2($n_ptr), @acc[2]\n\t mov\t@acc[3], 8*3($r_ptr)\n\tsbb\t8*9($b_org), @acc[9]\n\t mov\t8*3($n_ptr), @acc[3]\n\t mov\t@acc[4], 8*4($r_ptr)\n\tsbb\t8*10($b_org), @acc[10]\n\t mov\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], 8*5($r_ptr)\n\tsbb\t8*11($b_org), @acc[11]\n\t mov\t8*5($n_ptr), @acc[5]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[0]\n\tand\t$b_org, @acc[1]\n\tand\t$b_org, @acc[2]\n\tand\t$b_org, @acc[3]\n\tand\t$b_org, @acc[4]\n\tand\t$b_org, @acc[5]\n\n\tadd\t@acc[0], @acc[6]\n\tadc\t@acc[1], @acc[7]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tadc\t@acc[2], @acc[8]\n\tmov\t@acc[7], 8*7($r_ptr)\n\tadc\t@acc[3], @acc[9]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tadc\t@acc[4], @acc[10]\n\tmov\t@acc[9], 8*9($r_ptr)\n\tadc\t@acc[5], @acc[11]\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tret\n.size\t__subx_mod_384x384,.-__subx_mod_384x384\n\n.type\t__addx_mod_384,\\@abi-omnipotent\n.align\t32\n__addx_mod_384:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tadd\t8*0($b_org), @acc[0]\n\tadc\t8*1($b_org), @acc[1]\n\tadc\t8*2($b_org), @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t8*3($b_org), @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t8*4($b_org), @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t8*5($b_org), @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$b_org, $b_org\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $b_org\n\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t@acc[9],  @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[10], @acc[4]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tcmovc\t@acc[11], @acc[5]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__addx_mod_384,.-__addx_mod_384\n\n.type\t__subx_mod_384,\\@abi-omnipotent\n.align\t32\n__subx_mod_384:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n__subx_mod_384_a_is_loaded:\n\tsub\t8*0($b_org), @acc[0]\n\t mov\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($b_org), @acc[1]\n\t mov\t8*1($n_ptr), @acc[7]\n\tsbb\t8*2($b_org), @acc[2]\n\t mov\t8*2($n_ptr), @acc[8]\n\tsbb\t8*3($b_org), @acc[3]\n\t mov\t8*3($n_ptr), @acc[9]\n\tsbb\t8*4($b_org), @acc[4]\n\t mov\t8*4($n_ptr), @acc[10]\n\tsbb\t8*5($b_org), @acc[5]\n\t mov\t8*5($n_ptr), @acc[11]\n\tsbb\t$b_org, $b_org\n\n\tand\t$b_org, @acc[6]\n\tand\t$b_org, @acc[7]\n\tand\t$b_org, @acc[8]\n\tand\t$b_org, @acc[9]\n\tand\t$b_org, @acc[10]\n\tand\t$b_org, @acc[11]\n\n\tadd\t@acc[6], @acc[0]\n\tadc\t@acc[7], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[8], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[9], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tadc\t@acc[10], @acc[4]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tadc\t@acc[11], @acc[5]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__subx_mod_384,.-__subx_mod_384\n___\n}\n\n########################################################################\n# \"Complex\" multiplication and squaring. Use vanilla multiplication when\n# possible to fold reductions. I.e. instead of mul_mont, mul_mont\n# followed by add/sub_mod, it calls mul, mul, double-width add/sub_mod\n# followed by *common* reduction... For single multiplication disjoint\n# reduction is bad for performance for given vector length, yet overall\n# it's a win here, because it's one reduction less.\n{ my $frame = 5*8 +\t# place for argument off-load +\n\t      3*768/8;\t# place for 3 768-bit temporary vectors\n$code.=<<___;\n.globl\tmulx_mont_384x\n.hidden\tmulx_mont_384x\n.type\tmulx_mont_384x,\\@function,5,\"unwind\"\n.align\t32\nmulx_mont_384x:\n.cfi_startproc\nmul_mont_384x\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tmov\t$b_org, $b_ptr\n\tmov\t$r_ptr, 8*4(%rsp)\t# offload arguments\n\tmov\t$a_ptr, 8*3(%rsp)\n\tmov\t$b_org, 8*2(%rsp)\n\tmov\t$n_ptr, 8*1(%rsp)\n\tmov\t$n0,    8*0(%rsp)\n\n\t################################# mul_384(t0, a->re, b->re);\n\t#lea\t0($b_btr), $b_ptr\t# b->re\n\t#lea\t0($a_ptr), $a_ptr\t# a->re\n\tlea\t40(%rsp), $r_ptr\t# t0\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\t################################# mul_384(t1, a->im, b->im);\n\tlea\t48($b_ptr), $b_ptr\t# b->im\n\tlea\t128+48($a_ptr), $a_ptr\t# a->im\n\tlea\t96($r_ptr), $r_ptr\t# t1\n\tcall\t__mulx_384\n\n\t################################# mul_384(t2, a->re+a->im, b->re+b->im);\n\tmov\t8*1(%rsp), $n_ptr\n\tlea\t($b_ptr), $a_ptr\t# b->re\n\tlea\t-48($b_ptr), $b_org\t# b->im\n\tlea\t40+192+48(%rsp), $r_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tmov\t8*3(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_org\t# a->im\n\tlea\t-48($r_ptr), $r_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__addx_mod_384\n\n\tlea\t($r_ptr),$b_ptr\n\tlea\t48($r_ptr),$a_ptr\n\tcall\t__mulx_384\n\n\t################################# t2=t2-t0-t1\n\tlea\t($r_ptr), $a_ptr\t# t2\n\tlea\t40(%rsp), $b_org\t# t0\n\tmov\t8*1(%rsp), $n_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\t# t2-t0\n\n\tlea\t($r_ptr), $a_ptr\t# t2\n\tlea\t-96($r_ptr), $b_org\t# t1\n\tcall\t__subx_mod_384x384\t# t2-t0-t1\n\n\t################################# t0=t0-t1\n\tlea\t40(%rsp), $a_ptr\n\tlea\t40+96(%rsp), $b_org\n\tlea\t40(%rsp), $r_ptr\n\tcall\t__subx_mod_384x384\t# t0-t1\n\n\tlea\t($n_ptr), $b_ptr\t# n_ptr for redc_mont_384\n\n\t################################# redc_mont_384(ret->re, t0, mod, n0);\n\tlea\t40(%rsp), $a_ptr\t# t0\n\tmov\t8*0(%rsp), %rcx\t\t# n0 for redc_mont_384\n\tmov\t8*4(%rsp), $r_ptr\t# ret->re\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\t################################# redc_mont_384(ret->im, t2, mod, n0);\n\tlea\t40+192(%rsp), $a_ptr\t# t2\n\tmov\t8*0(%rsp), %rcx\t\t# n0 for redc_mont_384\n\tlea\t48($r_ptr), $r_ptr\t# ret->im\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmulx_mont_384x,.-mulx_mont_384x\n___\n}\n{ my $frame = 4*8 +\t# place for argument off-load +\n\t      2*384/8 +\t# place for 2 384-bit temporary vectors\n\t      8;\t# alignment\n$code.=<<___;\n.globl\tsqrx_mont_384x\n.hidden\tsqrx_mont_384x\n.type\tsqrx_mont_384x,\\@function,4,\"unwind\"\n.align\t32\nsqrx_mont_384x:\n.cfi_startproc\nsqr_mont_384x\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tmov\t$n_ptr, 8*0(%rsp)\t# n0\n\tmov\t$b_org, $n_ptr\t\t# n_ptr\n\t\t\t\t\t# gap for __mulx_mont_384\n\tmov\t$r_ptr, 8*2(%rsp)\n\tmov\t$a_ptr, 8*3(%rsp)\n\n\t################################# add_mod_384(t0, a->re, a->im);\n\tlea\t48($a_ptr), $b_org\t# a->im\n\tlea\t32(%rsp), $r_ptr\t# t0\n\tcall\t__addx_mod_384\n\n\t################################# sub_mod_384(t1, a->re, a->im);\n\tmov\t8*3(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_org\t# a->im\n\tlea\t32+48(%rsp), $r_ptr\t# t1\n\tcall\t__subx_mod_384\n\n\t################################# mul_mont_384(ret->im, a->re, a->im, mod, n0);\n\tmov\t8*3(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_ptr\t# a->im\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t48($a_ptr), %rdx\n\tmov\t8*0($a_ptr), %r14\t# @acc[6]\n\tmov\t8*1($a_ptr), %r15\t# @acc[7]\n\tmov\t8*2($a_ptr), %rax\t# @acc[8]\n\tmov\t8*3($a_ptr), %r12\t# @acc[4]\n\tmov\t8*4($a_ptr), %rdi\t# $lo\n\tmov\t8*5($a_ptr), %rbp\t# $hi\n\tlea\t-128($a_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t%r14, %r8, %r9\n\tcall\t__mulx_mont_384\n___\n{\nmy @acc = map(\"%r$_\",\"dx\",15,\"ax\",12,\"di\",\"bp\",\t# output from __mulx_mont_384\n                      8..11,13,14);\n$code.=<<___;\n\tadd\t@acc[0], @acc[0]\t# add with itself\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\t mov\t@acc[0], @acc[6]\n\tadc\t@acc[3], @acc[3]\n\t mov\t@acc[1], @acc[7]\n\tadc\t@acc[4], @acc[4]\n\t mov\t@acc[2], @acc[8]\n\tadc\t@acc[5], @acc[5]\n\t mov\t@acc[3], @acc[9]\n\tsbb\t$a_ptr, $a_ptr\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\t mov\t@acc[4], @acc[10]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\t mov\t@acc[5], @acc[11]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $a_ptr\n\n\tcmovc\t@acc[6],  @acc[0]\n\tcmovc\t@acc[7],  @acc[1]\n\tcmovc\t@acc[8],  @acc[2]\n\tmov\t@acc[0], 8*6($b_ptr)\t# ret->im\n\tcmovc\t@acc[9],  @acc[3]\n\tmov\t@acc[1], 8*7($b_ptr)\n\tcmovc\t@acc[10], @acc[4]\n\tmov\t@acc[2], 8*8($b_ptr)\n\tcmovc\t@acc[11], @acc[5]\n\tmov\t@acc[3], 8*9($b_ptr)\n\tmov\t@acc[4], 8*10($b_ptr)\n\tmov\t@acc[5], 8*11($b_ptr)\n___\n}\n$code.=<<___;\n\t################################# mul_mont_384(ret->re, t0, t1, mod, n0);\n\tlea\t32(%rsp), $a_ptr\t# t0\n\tlea\t32+48(%rsp), $b_ptr\t# t1\n\n\tmov\t32+48(%rsp), %rdx\t# t1[0]\n\tmov\t32+8*0(%rsp), %r14\t# @acc[6]\n\tmov\t32+8*1(%rsp), %r15\t# @acc[7]\n\tmov\t32+8*2(%rsp), %rax\t# @acc[8]\n\tmov\t32+8*3(%rsp), %r12\t# @acc[4]\n\tmov\t32+8*4(%rsp), %rdi\t# $lo\n\tmov\t32+8*5(%rsp), %rbp\t# $hi\n\tlea\t-128($a_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t%r14, %r8, %r9\n\tcall\t__mulx_mont_384\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_mont_384x,.-sqrx_mont_384x\n\n.globl\tmulx_382x\n.hidden\tmulx_382x\n.type\tmulx_382x,\\@function,4,\"unwind\"\n.align\t32\nmulx_382x:\n.cfi_startproc\nmul_382x\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tlea\t96($r_ptr), $r_ptr\t# ret->im\n\tmov\t$a_ptr, 8*0(%rsp)\n\tmov\t$b_org, 8*1(%rsp)\n\tmov\t$r_ptr, 8*2(%rsp)\t# offload ret->im\n\tmov\t$n_ptr, 8*3(%rsp)\n\n\t################################# t0 = a->re + a->im\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tadd\t8*6($a_ptr), @acc[0]\n\tadc\t8*7($a_ptr), @acc[1]\n\tadc\t8*8($a_ptr), @acc[2]\n\tadc\t8*9($a_ptr), @acc[3]\n\tadc\t8*10($a_ptr), @acc[4]\n\tadc\t8*11($a_ptr), @acc[5]\n\n\tmov\t@acc[0], 32+8*0(%rsp)\n\tmov\t@acc[1], 32+8*1(%rsp)\n\tmov\t@acc[2], 32+8*2(%rsp)\n\tmov\t@acc[3], 32+8*3(%rsp)\n\tmov\t@acc[4], 32+8*4(%rsp)\n\tmov\t@acc[5], 32+8*5(%rsp)\n\n\t################################# t1 = b->re + b->im\n\tmov\t8*0($b_org), @acc[0]\n\tmov\t8*1($b_org), @acc[1]\n\tmov\t8*2($b_org), @acc[2]\n\tmov\t8*3($b_org), @acc[3]\n\tmov\t8*4($b_org), @acc[4]\n\tmov\t8*5($b_org), @acc[5]\n\n\tadd\t8*6($b_org), @acc[0]\n\tadc\t8*7($b_org), @acc[1]\n\tadc\t8*8($b_org), @acc[2]\n\tadc\t8*9($b_org), @acc[3]\n\tadc\t8*10($b_org), @acc[4]\n\tadc\t8*11($b_org), @acc[5]\n\n\tmov\t@acc[0], 32+8*6(%rsp)\n\tmov\t@acc[1], 32+8*7(%rsp)\n\tmov\t@acc[2], 32+8*8(%rsp)\n\tmov\t@acc[3], 32+8*9(%rsp)\n\tmov\t@acc[4], 32+8*10(%rsp)\n\tmov\t@acc[5], 32+8*11(%rsp)\n\n\t################################# mul_384(ret->im, t0, t1);\n\tlea\t32+8*0(%rsp), $a_ptr\t# t0\n\tlea\t32+8*6(%rsp), $b_ptr\t# t1\n\tcall\t__mulx_384\n\n\t################################# mul_384(ret->re, a->re, b->re);\n\tmov\t8*0(%rsp), $a_ptr\n\tmov\t8*1(%rsp), $b_ptr\n\tlea\t-96($r_ptr), $r_ptr\t# ret->re\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\t################################# mul_384(tx, a->im, b->im);\n\tlea\t48+128($a_ptr), $a_ptr\n\tlea\t48($b_ptr), $b_ptr\n\tlea\t32(%rsp), $r_ptr\n\tcall\t__mulx_384\n\n\t################################# ret->im -= tx\n\tmov\t8*2(%rsp), $a_ptr\t# restore ret->im\n\tlea\t32(%rsp), $b_org\n\tmov\t8*3(%rsp), $n_ptr\n\tmov\t$a_ptr, $r_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__subx_mod_384x384\n\n\t################################# ret->im -= ret->re\n\tlea\t0($r_ptr), $a_ptr\n\tlea\t-96($r_ptr), $b_org\n\tcall\t__subx_mod_384x384\n\n\t################################# ret->re -= tx\n\tlea\t-96($r_ptr), $a_ptr\n\tlea\t32(%rsp), $b_org\n\tlea\t-96($r_ptr), $r_ptr\n\tcall\t__subx_mod_384x384\n\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmulx_382x,.-mulx_382x\n___\n}\n{ my @acc=(@acc,\"%rax\",\"%rbx\",\"%rbp\",$b_org);\t# all registers are affected\n\t\t\t\t\t\t# except for $n_ptr and $r_ptr\n$code.=<<___;\n.globl\tsqrx_382x\n.hidden\tsqrx_382x\n.type\tsqrx_382x,\\@function,3,\"unwind\"\n.align\t32\nsqrx_382x:\n.cfi_startproc\nsqr_382x\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$a_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n\n\t################################# t0 = a->re + a->im\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[6]\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[9]\n\tmov\t8*4($a_ptr), @acc[10]\n\tmov\t8*5($a_ptr), @acc[11]\n\n\tmov\t@acc[6], @acc[0]\n\tadd\t8*6($a_ptr), @acc[6]\n\tmov\t@acc[7], @acc[1]\n\tadc\t8*7($a_ptr), @acc[7]\n\tmov\t@acc[8], @acc[2]\n\tadc\t8*8($a_ptr), @acc[8]\n\tmov\t@acc[9], @acc[3]\n\tadc\t8*9($a_ptr), @acc[9]\n\tmov\t@acc[10], @acc[4]\n\tadc\t8*10($a_ptr), @acc[10]\n\tmov\t@acc[11], @acc[5]\n\tadc\t8*11($a_ptr), @acc[11]\n\n\tmov\t@acc[6], 8*0($r_ptr)\n\tmov\t@acc[7], 8*1($r_ptr)\n\tmov\t@acc[8], 8*2($r_ptr)\n\tmov\t@acc[9], 8*3($r_ptr)\n\tmov\t@acc[10], 8*4($r_ptr)\n\tmov\t@acc[11], 8*5($r_ptr)\n\n\t################################# t1 = a->re - a->im\n\tlea\t48($a_ptr), $b_org\n\tlea\t48($r_ptr), $r_ptr\n\tcall\t__subx_mod_384_a_is_loaded\n\n\t################################# mul_384(ret->re, t0, t1);\n\tlea\t($r_ptr), $a_ptr\n\tlea\t-48($r_ptr), $b_ptr\n\tlea\t-48($r_ptr), $r_ptr\n\tcall\t__mulx_384\n\n\t################################# mul_384(ret->im, a->re, a->im);\n\tmov\t(%rsp), $a_ptr\n\tlea\t48($a_ptr), $b_ptr\n\tlea\t96($r_ptr), $r_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmov\t8*0($r_ptr), @acc[0]\t# double ret->im\n\tmov\t8*1($r_ptr), @acc[1]\n\tmov\t8*2($r_ptr), @acc[2]\n\tmov\t8*3($r_ptr), @acc[3]\n\tmov\t8*4($r_ptr), @acc[4]\n\tmov\t8*5($r_ptr), @acc[5]\n\tmov\t8*6($r_ptr), @acc[6]\n\tmov\t8*7($r_ptr), @acc[7]\n\tmov\t8*8($r_ptr), @acc[8]\n\tmov\t8*9($r_ptr), @acc[9]\n\tmov\t8*10($r_ptr), @acc[10]\n\tadd\t@acc[0], @acc[0]\n\tmov\t8*11($r_ptr), @acc[11]\n\tadc\t@acc[1], @acc[1]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tadc\t@acc[2], @acc[2]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tadc\t@acc[3], @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tadc\t@acc[4], @acc[4]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tadc\t@acc[5], @acc[5]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tadc\t@acc[6], @acc[6]\n\tmov\t@acc[5], 8*5($r_ptr)\n\tadc\t@acc[7], @acc[7]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tadc\t@acc[8], @acc[8]\n\tmov\t@acc[7], 8*7($r_ptr)\n\tadc\t@acc[9], @acc[9]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tadc\t@acc[10], @acc[10]\n\tmov\t@acc[9], 8*9($r_ptr)\n\tadc\t@acc[11], @acc[11]\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tmov\t8*1(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8*2(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t8*3(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t8*4(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8*5(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*6(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*7(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*7\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_382x,.-sqrx_382x\n___\n}\n{ ########################################################## 384-bit mulx\nmy ($a0, $a1) = @acc[6..7];\nmy @acc = @acc[0..5];\nmy ($lo, $hi, $zr) = (\"%rax\", \"%rcx\", \"%rbp\");\n\n$code.=<<___;\n.globl\tmulx_384\n.hidden\tmulx_384\n.type\tmulx_384,\\@function,3,\"unwind\"\n.align\t32\nmulx_384:\n.cfi_startproc\nmul_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n.cfi_end_prologue\n\n\tmov\t$b_org, $b_ptr\t\t# evacuate from %rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_384\n\n\tmov\t0(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t16(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t24(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t32(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t40(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t48(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-48\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmulx_384,.-mulx_384\n\n.type\t__mulx_384,\\@abi-omnipotent\n.align\t32\n__mulx_384:\n\tmov\t8*0($b_ptr), %rdx\n\tmov\t8*0($a_ptr), $a0\n\tmov\t8*1($a_ptr), $a1\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\tlea\t-128($a_ptr), $a_ptr\n\n\tmulx\t$a0, @acc[1], $hi\n\txor\t$zr, $zr\n\n\tmulx\t$a1, @acc[0], $lo\n\tadcx\t$hi, @acc[0]\n\tmov\t@acc[1], 8*0($r_ptr)\n\n\tmulx\t@acc[2], @acc[1], $hi\n\tadcx\t$lo, @acc[1]\n\n\tmulx\t@acc[3], @acc[2], $lo\n\tadcx\t$hi, @acc[2]\n\n\tmulx\t@acc[4], @acc[3], $hi\n\tadcx\t$lo, @acc[3]\n\n\tmulx\t@acc[5], @acc[4], @acc[5]\n\tmov\t8*1($b_ptr), %rdx\n\tadcx\t$hi, @acc[4]\n\tadcx\t$zr, @acc[5]\n___\nfor(my $i=1; $i<6; $i++) {\nmy $b_next = $i<5 ? 8*($i+1).\"($b_ptr)\" : \"%rax\";\n$code.=<<___;\n\tmulx\t$a0, $lo, $hi\n\tadcx\t@acc[0], $lo\n\tadox\t$hi, @acc[1]\n\tmov\t$lo, 8*$i($r_ptr)\n\n\tmulx\t$a1, @acc[0], $hi\n\tadcx\t@acc[1], $acc[0]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t128+8*2($a_ptr), @acc[1], $lo\n\tadcx\t@acc[2], @acc[1]\n\tadox\t$lo, @acc[3]\n\n\tmulx\t128+8*3($a_ptr), @acc[2], $hi\n\tadcx\t@acc[3], @acc[2]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t128+8*4($a_ptr), @acc[3], $lo\n\tadcx\t@acc[4], @acc[3]\n\tadox\t@acc[5], $lo\n\n\tmulx\t128+8*5($a_ptr), @acc[4], @acc[5]\n\tmov\t$b_next, %rdx\n\tadcx\t$lo, @acc[4]\n\tadox\t$zr, @acc[5]\n\tadcx\t$zr, @acc[5]\n___\n}\n$code.=<<___;\n\tmov\t@acc[0], 8*6($r_ptr)\n\tmov\t@acc[1], 8*7($r_ptr)\n\tmov\t@acc[2], 8*8($r_ptr)\n\tmov\t@acc[3], 8*9($r_ptr)\n\tmov\t@acc[4], 8*10($r_ptr)\n\tmov\t@acc[5], 8*11($r_ptr)\n\n\tret\n.size\t__mulx_384,.-__mulx_384\n___\n}\n{ ########################################################## 384-bit sqrx\n$code.=<<___;\n.globl\tsqrx_384\n.hidden\tsqrx_384\n.type\tsqrx_384,\\@function,2,\"unwind\"\n.align\t32\nsqrx_384:\n.cfi_startproc\nsqr_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tpush\t$r_ptr\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__sqrx_384\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_384,.-sqrx_384\n___\nif (0) {\n# up to 5% slower than below variant\nmy @acc=map(\"%r$_\",(\"no\",8..15,\"cx\",\"bx\"));\n   push(@acc, $a_ptr);\nmy ($lo, $hi, $carry)=(\"%rax\", \"%rbp\", \"%rno\");\n\n$code.=<<___;\n.type\t__sqrx_384,\\@abi-omnipotent\n.align\t32\n__sqrx_384:\n\tmov\t8*0($a_ptr), %rdx\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[9]\n\tmov\t8*4($a_ptr), @acc[10]\n\n\t#########################################\n\tmulx\t@acc[7], @acc[1], $lo\t\t# a[1]*a[0]\n\t mov\t8*5($a_ptr), @acc[11]\n\tmulx\t@acc[8], @acc[2], $hi\t\t# a[2]*a[0]\n\tadd\t$lo, @acc[2]\n\tmulx\t@acc[9], @acc[3], $lo\t\t# a[3]*a[0]\n\tadc\t$hi, @acc[3]\n\tmulx\t@acc[10], @acc[4], $hi\t\t# a[4]*a[0]\n\tadc\t$lo, @acc[4]\n\tmulx\t@acc[11], @acc[5], @acc[6]\t# a[5]*a[0]\n\tadc\t$hi, @acc[5]\n\tadc\t\\$0, @acc[6]\n\n\tmulx\t%rdx, $lo, $hi\t\t\t# a[0]*a[0]\n\t mov\t@acc[7], %rdx\n\txor\t@acc[7], @acc[7]\n\tadd\t@acc[1], @acc[1]\t\t# double acc[1]\n\tadc\t\\$0, @acc[7]\n\tadd\t$hi, @acc[1]\n\tadc\t\\$0, @acc[7]\n\tmov\t$lo, 8*0($r_ptr)\n\tmov\t@acc[1], 8*1($r_ptr)\n___\n($carry, @acc[7]) = (@acc[7], @acc[1]);\n$code.=<<___;\n\t#########################################\n\txor\t@acc[7], @acc[7]\n\tmulx\t@acc[8], $lo, $hi\t\t# a[2]*a[1]\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t@acc[9], $lo, $hi\t\t# a[3]*a[1]\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\n\tmulx\t@acc[10], $lo, $hi\t\t# a[4]*a[1]\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\n\tmulx\t@acc[11], $lo, $hi\t\t# a[5]*a[1]\n\tadcx\t$lo, @acc[6]\n\tadox\t@acc[7], $hi\n\tadcx\t$hi, @acc[7]\n\n\tmulx\t%rdx, $lo, $hi\t\t\t# a[1]*a[1]\n\t mov\t@acc[8], %rdx\n\txor\t@acc[8], @acc[8]\n\tadox\t@acc[2], @acc[2]\t\t# double acc[2:3]\n\tadcx\t$carry, $lo\t\t\t# can't carry\n\tadox\t@acc[3], @acc[3]\n\tadcx\t$lo, @acc[2]\n\tadox\t@acc[8], @acc[8]\n\tadcx\t$hi, @acc[3]\n\tadc\t\\$0, @acc[8]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n___\n($carry,@acc[8])=(@acc[8],$carry);\n$code.=<<___;\n\t#########################################\n\txor\t@acc[8], @acc[8]\n\tmulx\t@acc[9], $lo, $hi\t\t# a[3]*a[2]\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\n\tmulx\t@acc[10], $lo, $hi\t\t# a[4]*a[2]\n\tadcx\t$lo, @acc[6]\n\tadox\t$hi, @acc[7]\n\n\tmulx\t@acc[11], $lo, $hi\t\t# a[5]*a[2]\n\tadcx\t$lo, @acc[7]\n\tadox\t@acc[8], $hi\n\tadcx\t$hi, @acc[8]\n\n\tmulx\t%rdx, $lo, $hi\t\t\t# a[2]*a[2]\n\t mov\t@acc[9], %rdx\n\txor\t@acc[9], @acc[9]\n\tadox\t@acc[4], @acc[4]\t\t# double acc[4:5]\n\tadcx\t$carry, $lo\t\t\t# can't carry\n\tadox\t@acc[5], @acc[5]\n\tadcx\t$lo, @acc[4]\n\tadox\t@acc[9], @acc[9]\n\tadcx\t$hi, @acc[5]\n\tadc\t\\$0, $acc[9]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n___\n($carry,@acc[9])=(@acc[9],$carry);\n$code.=<<___;\n\t#########################################\n\txor\t@acc[9], @acc[9]\n\tmulx\t@acc[10], $lo, $hi\t\t# a[4]*a[3]\n\tadcx\t$lo, @acc[7]\n\tadox\t$hi, @acc[8]\n\n\tmulx\t@acc[11], $lo, $hi\t\t# a[5]*a[3]\n\tadcx\t$lo, @acc[8]\n\tadox\t@acc[9], $hi\n\tadcx\t$hi, @acc[9]\n\n\tmulx\t%rdx, $lo, $hi\n\t mov\t@acc[10], %rdx\n\txor\t@acc[10], @acc[10]\n\tadox\t@acc[6], @acc[6]\t\t# double acc[6:7]\n\tadcx\t$carry, $lo\t\t\t# can't carry\n\tadox\t@acc[7], @acc[7]\n\tadcx\t$lo, @acc[6]\n\tadox\t@acc[10], @acc[10]\n\tadcx\t$hi, @acc[7]\n\tadc\t\\$0, $acc[10]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tmov\t@acc[7], 8*7($r_ptr)\n___\n($carry,@acc[10])=(@acc[10],$carry);\n$code.=<<___;\n\t#########################################\n\tmulx\t@acc[11], $lo, @acc[10]\t\t# a[5]*a[4]\n\tadd\t$lo, @acc[9]\n\tadc\t\\$0, @acc[10]\n\n\tmulx\t%rdx, $lo, $hi\t\t\t# a[4]*a[4]\n\t mov\t@acc[11], %rdx\n\txor\t@acc[11], @acc[11]\n\tadox\t@acc[8], @acc[8]\t\t# double acc[8:10]\n\tadcx\t$carry, $lo\t\t\t# can't carry\n\tadox\t@acc[9], @acc[9]\n\tadcx\t$lo, @acc[8]\n\tadox\t@acc[10], @acc[10]\n\tadcx\t$hi, @acc[9]\n\tadox\t@acc[11], @acc[11]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tmov\t@acc[9], 8*9($r_ptr)\n\n\t#########################################\n\tmulx\t%rdx, $lo, $hi\t\t\t# a[5]*a[5]\n\tadcx\t$lo, @acc[10]\n\tadcx\t$hi, @acc[11]\n\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tret\n.size\t__sqrx_384,.-__sqrx_384\n___\n} else {\nmy @acc=map(\"%r$_\",(\"no\",8..15,\"cx\",\"bx\",\"bp\"));\nmy ($lo, $hi)=($r_ptr, \"%rax\");\n\n$code.=<<___;\n.type\t__sqrx_384,\\@abi-omnipotent\n.align\t32\n__sqrx_384:\n\tmov\t8*0($a_ptr), %rdx\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[9]\n\tmov\t8*4($a_ptr), @acc[10]\n\n\t#########################################\n\tmulx\t@acc[7], @acc[1], $lo\t\t# a[1]*a[0]\n\t mov\t8*5($a_ptr), @acc[11]\n\tmulx\t@acc[8], @acc[2], $hi\t\t# a[2]*a[0]\n\tadd\t$lo, @acc[2]\n\tmulx\t@acc[9], @acc[3], $lo\t\t# a[3]*a[0]\n\tadc\t$hi, @acc[3]\n\tmulx\t@acc[10], @acc[4], $hi\t\t# a[4]*a[0]\n\tadc\t$lo, @acc[4]\n\tmulx\t@acc[11], @acc[5], @acc[6]\t# a[5]*a[0]\n\t mov\t@acc[7], %rdx\n\tadc\t$hi, @acc[5]\n\tadc\t\\$0, @acc[6]\n\n\t#########################################\n\txor\t@acc[7], @acc[7]\n\tmulx\t@acc[8], $lo, $hi\t\t# a[2]*a[1]\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t@acc[9], $lo, $hi\t\t# a[3]*a[1]\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\n\tmulx\t@acc[10], $lo, $hi\t\t# a[4]*a[1]\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\n\tmulx\t@acc[11], $lo, $hi\t\t# a[5]*a[1]\n\t mov\t@acc[8], %rdx\n\tadcx\t$lo, @acc[6]\n\tadox\t@acc[7], $hi\n\tadcx\t$hi, @acc[7]\n\n\t#########################################\n\txor\t@acc[8], @acc[8]\n\tmulx\t@acc[9], $lo, $hi\t\t# a[3]*a[2]\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\n\tmulx\t@acc[10], $lo, $hi\t\t# a[4]*a[2]\n\tadcx\t$lo, @acc[6]\n\tadox\t$hi, @acc[7]\n\n\tmulx\t@acc[11], $lo, $hi\t\t# a[5]*a[2]\n\t mov\t@acc[9], %rdx\n\tadcx\t$lo, @acc[7]\n\tadox\t@acc[8], $hi\n\tadcx\t$hi, @acc[8]\n\n\t#########################################\n\txor\t@acc[9], @acc[9]\n\tmulx\t@acc[10], $lo, $hi\t\t# a[4]*a[3]\n\tadcx\t$lo, @acc[7]\n\tadox\t$hi, @acc[8]\n\n\tmulx\t@acc[11], $lo, $hi\t\t# a[5]*a[3]\n\t mov\t@acc[10], %rdx\n\tadcx\t$lo, @acc[8]\n\tadox\t@acc[9], $hi\n\tadcx\t$hi, @acc[9]\n\n\t#########################################\n\tmulx\t@acc[11], $lo, @acc[10]\t\t# a[5]*a[4]\n\t mov\t8*0($a_ptr), %rdx\n\tadd\t$lo, @acc[9]\n\t mov\t8(%rsp), $r_ptr\t\t\t# restore $r_ptr\n\tadc\t\\$0, @acc[10]\n\n\t######################################### double acc[1:10]\n\txor\t@acc[11], @acc[11]\n\tadcx\t@acc[1], @acc[1]\n\tadcx\t@acc[2], @acc[2]\n\tadcx\t@acc[3], @acc[3]\n\tadcx\t@acc[4], @acc[4]\n\tadcx\t@acc[5], @acc[5]\n\n\t######################################### accumulate a[i]*a[i]\n\tmulx\t%rdx, %rdx, $hi \t\t# a[0]*a[0]\n\tmov\t%rdx, 8*0($r_ptr)\n\tmov\t8*1($a_ptr), %rdx\n\tadox\t$hi, @acc[1]\n\tmov\t@acc[1], 8*1($r_ptr)\n\n\tmulx\t%rdx, @acc[1], $hi\t\t# a[1]*a[1]\n\tmov\t8*2($a_ptr), %rdx\n\tadox\t@acc[1], @acc[2]\n\tadox\t$hi,     @acc[3]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tmov\t@acc[3], 8*3($r_ptr)\n\n\tmulx\t%rdx, @acc[1], @acc[2]\t\t# a[2]*a[2]\n\tmov\t8*3($a_ptr), %rdx\n\tadox\t@acc[1], @acc[4]\n\tadox\t@acc[2], @acc[5]\n\tadcx\t@acc[6], @acc[6]\n\tadcx\t@acc[7], @acc[7]\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tmulx\t%rdx, @acc[1], @acc[2]\t\t# a[3]*a[3]\n\tmov\t8*4($a_ptr), %rdx\n\tadox\t@acc[1], @acc[6]\n\tadox\t@acc[2], @acc[7]\n\tadcx\t@acc[8], @acc[8]\n\tadcx\t@acc[9], @acc[9]\n\tmov\t@acc[6], 8*6($r_ptr)\n\tmov\t@acc[7], 8*7($r_ptr)\n\n\tmulx\t%rdx, @acc[1], @acc[2]\t\t# a[4]*a[4]\n\tmov\t8*5($a_ptr), %rdx\n\tadox\t@acc[1], @acc[8]\n\tadox\t@acc[2], @acc[9]\n\tadcx\t@acc[10], @acc[10]\n\tadcx\t@acc[11], @acc[11]\n\tmov\t@acc[8], 8*8($r_ptr)\n\tmov\t@acc[9], 8*9($r_ptr)\n\n\tmulx\t%rdx, @acc[1], @acc[2]\t\t# a[5]*a[5]\n\tadox\t@acc[1], @acc[10]\n\tadox\t@acc[2], @acc[11]\n\n\tmov\t@acc[10], 8*10($r_ptr)\n\tmov\t@acc[11], 8*11($r_ptr)\n\n\tret\n.size\t__sqrx_384,.-__sqrx_384\n___\n}\n\n{ ########################################################## 384-bit redcx_mont\nmy ($n_ptr, $n0)=($b_ptr, $n_ptr);      # arguments are \"shifted\"\nmy ($lo, $hi) = (\"%rax\", \"%rbp\");\n\n$code.=<<___;\n########################################################################\n# void redcx_mont_384(uint64_t ret[6], const uint64_t a[12],\n#                     uint64_t m[6], uint64_t n0);\n.globl\tredcx_mont_384\n.hidden\tredcx_mont_384\n.type\tredcx_mont_384,\\@function,4,\"unwind\"\n.align\t32\nredcx_mont_384:\n.cfi_startproc\nredc_mont_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\tcall\t__redx_tail_mont_384\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tredcx_mont_384,.-redcx_mont_384\n\n########################################################################\n# void fromx_mont_384(uint64_t ret[6], const uint64_t a[6],\n#                    uint64_t m[6], uint64_t n0);\n.globl\tfromx_mont_384\n.hidden\tfromx_mont_384\n.type\tfromx_mont_384,\\@function,4,\"unwind\"\n.align\t32\nfromx_mont_384:\n.cfi_startproc\nfrom_mont_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$b_org, $n_ptr\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\t#################################\n\t# Branch-less conditional acc[0:6] - modulus\n\n\tmov\t@acc[6], %rax\n\tmov\t@acc[7], %rcx\n\tmov\t@acc[0], %rdx\n\tmov\t@acc[1], %rbp\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[7]\n\tmov\t@acc[2], @acc[5]\n\tsbb\t8*2($n_ptr), @acc[0]\n\tsbb\t8*3($n_ptr), @acc[1]\n\tsbb\t8*4($n_ptr), @acc[2]\n\tmov\t@acc[3], $a_ptr\n\tsbb\t8*5($n_ptr), @acc[3]\n\n\tcmovc\t%rax, @acc[6]\n\tcmovc\t%rcx, @acc[7]\n\tcmovc\t%rdx, @acc[0]\n\tmov\t@acc[6], 8*0($r_ptr)\n\tcmovc\t%rbp, @acc[1]\n\tmov\t@acc[7], 8*1($r_ptr)\n\tcmovc\t@acc[5], @acc[2]\n\tmov\t@acc[0], 8*2($r_ptr)\n\tcmovc\t$a_ptr,  @acc[3]\n\tmov\t@acc[1], 8*3($r_ptr)\n\tmov\t@acc[2], 8*4($r_ptr)\n\tmov\t@acc[3], 8*5($r_ptr)\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tfromx_mont_384,.-fromx_mont_384\n___\n{ my @acc=@acc;\t\t\t\t# will be rotated locally\n\n$code.=<<___;\n.type\t__mulx_by_1_mont_384,\\@abi-omnipotent\n.align\t32\n__mulx_by_1_mont_384:\n\tmov\t8*0($a_ptr), @acc[0]\n\tmov\t$n0, %rdx\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n___\nfor (my $i=0; $i<6; $i++) {\n$code.=<<___;\n\timulq\t@acc[0], %rdx\n\n\t################################# reduction $i\n\txor\t@acc[6], @acc[6]\t# @acc[6]=0, cf=0, of=0\n\tmulx\t8*0($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[0]\t\t# guaranteed to be zero\n\tadox\t$hi, @acc[1]\n\n\tmulx\t8*1($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t8*4($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\n\tmulx\t8*5($n_ptr), $lo, $hi\n\t mov\t$n0, %rdx\n\tadcx\t$lo, @acc[5]\n\tadox\t@acc[6], $hi\n\tadcx\t$hi, @acc[6]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\tret\n.size\t__mulx_by_1_mont_384,.-__mulx_by_1_mont_384\n\n.type\t__redx_tail_mont_384,\\@abi-omnipotent\n.align\t32\n__redx_tail_mont_384:\n\tadd\t8*6($a_ptr), @acc[0]\t# accumulate upper half\n\tmov\t@acc[0], %rax\n\tadc\t8*7($a_ptr), @acc[1]\n\tadc\t8*8($a_ptr), @acc[2]\n\tadc\t8*9($a_ptr), @acc[3]\n\tmov\t@acc[1], %rcx\n\tadc\t8*10($a_ptr), @acc[4]\n\tadc\t8*11($a_ptr), @acc[5]\n\tsbb\t@acc[6], @acc[6]\n\n\t#################################\n\t# Branch-less conditional acc[0:6] - modulus\n\n\tmov\t@acc[2], %rdx\n\tmov\t@acc[3], %rbp\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tmov\t@acc[4], @acc[7]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tmov\t@acc[5], $a_ptr\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, @acc[6]\n\n\tcmovc\t%rax, @acc[0]\n\tcmovc\t%rcx, @acc[1]\n\tcmovc\t%rdx, @acc[2]\n\tmov\t@acc[0], 8*0($r_ptr)\n\tcmovc\t%rbp, @acc[3]\n\tmov\t@acc[1], 8*1($r_ptr)\n\tcmovc\t@acc[7], @acc[4]\n\tmov\t@acc[2], 8*2($r_ptr)\n\tcmovc\t$a_ptr,  @acc[5]\n\tmov\t@acc[3], 8*3($r_ptr)\n\tmov\t@acc[4], 8*4($r_ptr)\n\tmov\t@acc[5], 8*5($r_ptr)\n\n\tret\n.size\t__redx_tail_mont_384,.-__redx_tail_mont_384\n\n.globl\tsgn0x_pty_mont_384\n.hidden\tsgn0x_pty_mont_384\n.type\tsgn0x_pty_mont_384,\\@function,3,\"unwind\"\n.align\t32\nsgn0x_pty_mont_384:\n.cfi_startproc\nsgn0_pty_mont_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$a_ptr, $n_ptr\n\tlea\t0($r_ptr), $a_ptr\n\tmov\t$b_org, $n0\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\txor\t%rax, %rax\n\tmov\t@acc[0], @acc[7]\n\tadd\t@acc[0], @acc[0]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, %rax\n\n\tsub\t8*0($n_ptr), @acc[0]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, %rax\n\n\tnot\t%rax\t\t\t# 2*x > p, which means \"negative\"\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, %rax\n\tor\t@acc[7], %rax\t\t# pack sign and parity\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsgn0x_pty_mont_384,.-sgn0x_pty_mont_384\n\n.globl\tsgn0x_pty_mont_384x\n.hidden\tsgn0x_pty_mont_384x\n.type\tsgn0x_pty_mont_384x,\\@function,3,\"unwind\"\n.align\t32\nsgn0x_pty_mont_384x:\n.cfi_startproc\nsgn0_pty_mont_384x\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$8, %rsp\n.cfi_adjust_cfa_offset\t8\n.cfi_end_prologue\n\n\tmov\t$a_ptr, $n_ptr\n\tlea\t48($r_ptr), $a_ptr\t# sgn0(a->im)\n\tmov\t$b_org, $n0\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tcall\t__mulx_by_1_mont_384\n\n\tmov\t@acc[0], @acc[6]\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[2], @acc[0]\n\tor\t@acc[3], @acc[0]\n\tor\t@acc[4], @acc[0]\n\tor\t@acc[5], @acc[0]\n\n\tlea\t0($r_ptr), $a_ptr\t# sgn0(a->re)\n\txor\t$r_ptr, $r_ptr\n\tmov\t@acc[6], @acc[7]\n\tadd\t@acc[6], @acc[6]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, $r_ptr\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, $r_ptr\n\n\tmov\t@acc[0], 0(%rsp)\t# a->im is zero or not\n\tnot\t$r_ptr\t\t\t# 2*x > p, which means \"negative\"\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, $r_ptr\n\tor\t@acc[7], $r_ptr\t\t# pack sign and parity\n\n\tcall\t__mulx_by_1_mont_384\n\n\tmov\t@acc[0], @acc[6]\n\tor\t@acc[1], @acc[0]\n\tor\t@acc[2], @acc[0]\n\tor\t@acc[3], @acc[0]\n\tor\t@acc[4], @acc[0]\n\tor\t@acc[5], @acc[0]\n\n\txor\t%rax, %rax\n\tmov\t@acc[6], @acc[7]\n\tadd\t@acc[6], @acc[6]\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\tadc\t\\$0, %rax\n\n\tsub\t8*0($n_ptr), @acc[6]\n\tsbb\t8*1($n_ptr), @acc[1]\n\tsbb\t8*2($n_ptr), @acc[2]\n\tsbb\t8*3($n_ptr), @acc[3]\n\tsbb\t8*4($n_ptr), @acc[4]\n\tsbb\t8*5($n_ptr), @acc[5]\n\tsbb\t\\$0, %rax\n\n\tmov\t0(%rsp), @acc[6]\n\n\tnot\t%rax\t\t\t# 2*x > p, which means \"negative\"\n\n\ttest\t@acc[0], @acc[0]\n\tcmovz\t$r_ptr, @acc[7]\t\t# a->re==0? prty(a->im) : prty(a->re)\n\n\ttest\t@acc[6], @acc[6]\n\tcmovnz\t$r_ptr, %rax\t\t# a->im!=0? sgn0(a->im) : sgn0(a->re)\n\n\tand\t\\$1, @acc[7]\n\tand\t\\$2, %rax\n\tor\t@acc[7], %rax\t\t# pack sign and parity\n\n\tmov\t8(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t16(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t24(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t32(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t40(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t48(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t56(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-56\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsgn0x_pty_mont_384x,.-sgn0x_pty_mont_384x\n___\n} }\n\n{ ########################################################## mulx/sqrx_mont\nmy @acc = (@acc, \"%rax\");\nmy ($lo,$hi)=(\"%rdi\",\"%rbp\");\n\n$code.=<<___;\n.globl\tmulx_mont_384\n.hidden\tmulx_mont_384\n.type\tmulx_mont_384,\\@function,5,\"unwind\"\n.align\t32\nmulx_mont_384:\n.cfi_startproc\nmul_mont_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tlea\t-8*3(%rsp), %rsp\n.cfi_adjust_cfa_offset\t8*3\n.cfi_end_prologue\n\n\tmov\t$b_org, $b_ptr\t\t# evacuate from %rdx\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($b_org), %rdx\n\tmov\t8*0($a_ptr), @acc[6]\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[4]\n\tmov\t$r_ptr, 8*2(%rsp)\n\tmov\t8*4($a_ptr), $lo\n\tmov\t8*5($a_ptr), $hi\n\tlea\t-128($a_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\tmov\t$n0, (%rsp)\n\n\tmulx\t@acc[6],@acc[0],@acc[1]\t# a[0]*b[0]\n\tcall\t__mulx_mont_384\n\n\tmov\t8*3(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8*4(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t8*5(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t8*6(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8*7(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*8(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*9(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*9\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tmulx_mont_384,.-mulx_mont_384\n___\n{ my @acc=@acc;\t\t\t\t# will be rotated locally\n\n$code.=<<___;\n.type\t__mulx_mont_384,\\@abi-omnipotent\n.align\t32\n__mulx_mont_384:\n.cfi_startproc\n\tmulx\t@acc[7], @acc[6], @acc[2]\n\tmulx\t@acc[8], @acc[7], @acc[3]\n\tadd\t@acc[6], @acc[1]\n\tmulx\t@acc[4], @acc[8], @acc[4]\n\tadc\t@acc[7], @acc[2]\n\tmulx\t$lo, $lo, @acc[5]\n\tadc\t@acc[8], @acc[3]\n\tmulx\t$hi, $hi, @acc[6]\n\t mov\t8($b_ptr), %rdx\n\tadc\t$lo, @acc[4]\n\tadc\t$hi, @acc[5]\n\tadc\t\\$0, @acc[6]\n\txor\t@acc[7], @acc[7]\n\n___\nfor (my $i=1; $i<6; $i++) {\nmy $tt = $i==1 ? @acc[7] : $hi;\nmy $b_next = $i<5 ? 8*($i+1).\"($b_ptr)\" : @acc[1];\n$code.=<<___;\n\t mov\t@acc[0], 16(%rsp)\n\t imulq\t8(%rsp), @acc[0]\n\n\t################################# Multiply by b[$i]\n\txor\t@acc[8], @acc[8]\t# @acc[8]=0, cf=0, of=0\n\tmulx\t8*0+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[1]\n\tadcx\t$hi, @acc[2]\n\n\tmulx\t8*1+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[2]\n\tadcx\t$hi, @acc[3]\n\n\tmulx\t8*2+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[3]\n\tadcx\t$hi, @acc[4]\n\n\tmulx\t8*3+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[4]\n\tadcx\t$hi, @acc[5]\n\n\tmulx\t8*4+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[5]\n\tadcx\t$hi, @acc[6]\n\n\tmulx\t8*5+128($a_ptr), $lo, $hi\n\t mov\t@acc[0], %rdx\n\tadox\t$lo, @acc[6]\n\tadcx\t$hi, @acc[7]\t\t# cf=0\n\tadox\t@acc[8], @acc[7]\n\tadox\t@acc[8], @acc[8]\n\n\t################################# reduction\n\txor\t@acc[0], @acc[0]\t# acc[0]=0, cf=0, of=0\n\tmulx\t8*0+128($n_ptr), $lo, $hi\n\tadcx\t16(%rsp), $lo\t\t# guaranteed to be zero\n\tadox\t$hi, @acc[1]\n\n\tmulx\t8*1+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t8*4+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\n\tmulx\t8*5+128($n_ptr), $lo, $hi\n\t mov\t$b_next, %rdx\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\tadcx\t@acc[0], @acc[6]\n\tadox\t@acc[0], @acc[7]\n\tadcx\t@acc[0], @acc[7]\n\tadox\t@acc[0], @acc[8]\n\tadcx\t@acc[0], @acc[8]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\timulq\t8(%rsp), %rdx\n\tmov\t8*3(%rsp), $b_ptr\t# restore $r_ptr\n\n\t################################# last reduction\n\txor\t@acc[8], @acc[8]\t# @acc[8]=0, cf=0, of=0\n\tmulx\t8*0+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[0]\t\t# guaranteed to be zero\n\tadox\t$hi, @acc[1]\n\n\tmulx\t8*1+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\t mov\t@acc[2], @acc[0]\n\n\tmulx\t8*4+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\t mov\t@acc[3], $a_ptr\n\n\tmulx\t8*5+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\t mov\t@acc[1], %rdx\n\tadcx\t@acc[8], @acc[6]\n\tadox\t@acc[8], @acc[7]\n\t lea\t128($n_ptr), $n_ptr\n\t mov\t@acc[4], @acc[8]\n\tadc\t\\$0, @acc[7]\n\n\t#################################\n\t# Branch-less conditional acc[1:7] - modulus\n\n\tsub\t8*0($n_ptr), @acc[1]\n\tsbb\t8*1($n_ptr), @acc[2]\n\t mov\t@acc[5], $lo\n\tsbb\t8*2($n_ptr), @acc[3]\n\tsbb\t8*3($n_ptr), @acc[4]\n\tsbb\t8*4($n_ptr), @acc[5]\n\t mov\t@acc[6], $hi\n\tsbb\t8*5($n_ptr), @acc[6]\n\tsbb\t\\$0, @acc[7]\n\n\tcmovnc\t@acc[1], %rdx\n\tcmovc\t@acc[0], @acc[2]\n\tcmovc\t$a_ptr, @acc[3]\n\tcmovnc\t@acc[4], @acc[8]\n\tmov\t%rdx, 8*0($b_ptr)\n\tcmovnc\t@acc[5], $lo\n\tmov\t@acc[2], 8*1($b_ptr)\n\tcmovnc\t@acc[6], $hi\n\tmov\t@acc[3], 8*2($b_ptr)\n\tmov\t@acc[8], 8*3($b_ptr)\n\tmov\t$lo, 8*4($b_ptr)\n\tmov\t$hi, 8*5($b_ptr)\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=%rsi\n.cfi_endproc\n.size\t__mulx_mont_384,.-__mulx_mont_384\n___\n}\n$code.=<<___;\n.globl\tsqrx_mont_384\n.hidden\tsqrx_mont_384\n.type\tsqrx_mont_384,\\@function,4,\"unwind\"\n.align\t32\nsqrx_mont_384:\n.cfi_startproc\nsqr_mont_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tlea\t-8*3(%rsp), %rsp\n.cfi_adjust_cfa_offset\t8*3\n.cfi_end_prologue\n\n\tmov\t$n_ptr, $n0\t\t# n0\n\tlea\t-128($b_org), $n_ptr\t# control u-op density\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), %rdx\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t8*3($a_ptr), @acc[4]\n\tmov\t$r_ptr, 8*2(%rsp)\n\tmov\t8*4($a_ptr), $lo\n\tmov\t8*5($a_ptr), $hi\n\n\tlea\t($a_ptr), $b_ptr\n\tmov\t$n0, (%rsp)\t\t# n0\n\tlea\t-128($a_ptr), $a_ptr\t# control u-op density\n\n\tmulx\t%rdx, @acc[0], @acc[1]\t# a[0]*a[0]\n\tcall\t__mulx_mont_384\t\t# as fast as dedicated squaring\n\n\tmov\t8*3(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8*4(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t8*5(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t8*6(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8*7(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*8(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*9(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*9\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_mont_384,.-sqrx_mont_384\n\n.globl\tsqrx_n_mul_mont_384\n.hidden\tsqrx_n_mul_mont_384\n.type\tsqrx_n_mul_mont_384,\\@function,6,\"unwind\"\n.align\t32\nsqrx_n_mul_mont_384:\n.cfi_startproc\nsqr_n_mul_mont_384\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tlea\t-8*5(%rsp), %rsp\n.cfi_adjust_cfa_offset\t8*5\n.cfi_end_prologue\n\n\tmov\t$b_org, @acc[2]\t\t# loop counter\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), %rdx\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t$a_ptr, $b_ptr\n\tmov\t8*3($a_ptr), @acc[4]\n\tmov\t$r_ptr, 8*2(%rsp)\t# to __mulx_mont_384\n\tmov\t8*4($a_ptr), $lo\n\tmov\t8*5($a_ptr), $hi\n\n\tmov\t$n0, (%rsp)\n\tmov\t%r9, 8*3(%rsp)\t\t# 6th, multiplicand argument\n\tmovq\t8*0(%r9), %xmm2\t\t# prefetch b[0]\n\n.Loop_sqrx_384:\n\tmovd\t@acc[2]d, %xmm1\n\tlea\t-128($b_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t%rdx, @acc[0], @acc[1]\t# a[0]*a[0]\n\tcall\t__mulx_mont_384\n\n\tmovd\t%xmm1, @acc[2]d\n\tdec\t@acc[2]d\n\tjnz\t.Loop_sqrx_384\n\n\tmov\t%rdx, @acc[6]\n\tmovq\t%xmm2, %rdx\t\t# b[0]\n\tlea\t-128($b_ptr), $a_ptr\t# control u-op density\n\tmov\t8*3(%rsp), $b_ptr\t# 6th, multiplicand argument\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t@acc[6],@acc[0],@acc[1]\t# a[0]*b[0]\n\tcall\t__mulx_mont_384\n\n\tmov\t8*5(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8*6(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t8*7(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t8*8(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8*9(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*10(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*11(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*11\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_n_mul_mont_384,.-sqrx_n_mul_mont_384\n\n.globl\tsqrx_n_mul_mont_383\n.hidden\tsqrx_n_mul_mont_383\n.type\tsqrx_n_mul_mont_383,\\@function,6,\"unwind\"\n.align\t32\nsqrx_n_mul_mont_383:\n.cfi_startproc\nsqr_n_mul_mont_383\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tlea\t-8*5(%rsp), %rsp\n.cfi_adjust_cfa_offset\t8*5\n.cfi_end_prologue\n\n\tmov\t$b_org, @acc[2]\t\t# loop counter\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), %rdx\n\tmov\t8*1($a_ptr), @acc[7]\n\tmov\t8*2($a_ptr), @acc[8]\n\tmov\t$a_ptr, $b_ptr\n\tmov\t8*3($a_ptr), @acc[4]\n\tmov\t$r_ptr, 8*2(%rsp)\t# to __mulx_mont_383_nonred\n\tmov\t8*4($a_ptr), $lo\n\tmov\t8*5($a_ptr), $hi\n\n\tmov\t$n0, (%rsp)\n\tmov\t%r9, 8*3(%rsp)\t\t# 6th, multiplicand argument\n\tmovq\t8*0(%r9), %xmm2\t\t# prefetch b[0]\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n.Loop_sqrx_383:\n\tmovd\t@acc[2]d, %xmm1\n\tlea\t-128($b_ptr), $a_ptr\t# control u-op density\n\n\tmulx\t%rdx, @acc[0], @acc[1]\t# a[0]*a[0]\n\tcall\t__mulx_mont_383_nonred\t# omitting full reduction gives ~15%\n\t\t\t\t\t# in addition-chains\n\tmovd\t%xmm1, @acc[2]d\n\tdec\t@acc[2]d\n\tjnz\t.Loop_sqrx_383\n\n\tmov\t%rdx, @acc[6]\n\tmovq\t%xmm2, %rdx\t\t# b[0]\n\tlea\t-128($b_ptr), $a_ptr\t# control u-op density\n\tmov\t8*3(%rsp), $b_ptr\t# 6th, multiplicand argument\n\n\tmulx\t@acc[6], @acc[0], @acc[1]\t# a[0]*b[0]\n\tcall\t__mulx_mont_384\n\n\tmov\t8*5(%rsp),%r15\n.cfi_restore\t%r15\n\tmov\t8*6(%rsp),%r14\n.cfi_restore\t%r14\n\tmov\t8*7(%rsp),%r13\n.cfi_restore\t%r13\n\tmov\t8*8(%rsp),%r12\n.cfi_restore\t%r12\n\tmov\t8*9(%rsp),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*10(%rsp),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*11(%rsp),%rsp\n.cfi_adjust_cfa_offset\t-8*11\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_n_mul_mont_383,.-sqrx_n_mul_mont_383\n___\n{ my @acc=@acc;\t\t\t\t# will be rotated locally\n\n$code.=<<___;\n.type\t__mulx_mont_383_nonred,\\@abi-omnipotent\n.align\t32\n__mulx_mont_383_nonred:\n.cfi_startproc\n\tmulx\t@acc[7], @acc[6], @acc[2]\n\tmulx\t@acc[8], @acc[7], @acc[3]\n\tadd\t@acc[6], @acc[1]\n\tmulx\t@acc[4], @acc[8], @acc[4]\n\tadc\t@acc[7], @acc[2]\n\tmulx\t$lo, $lo, @acc[5]\n\tadc\t@acc[8], @acc[3]\n\tmulx\t$hi, $hi, @acc[6]\n\t mov\t8($b_ptr), %rdx\n\tadc\t$lo, @acc[4]\n\tadc\t$hi, @acc[5]\n\tadc\t\\$0, @acc[6]\n___\nfor (my $i=1; $i<6; $i++) {\nmy $tt = $i==1 ? @acc[7] : $hi;\nmy $b_next = $i<5 ? 8*($i+1).\"($b_ptr)\" : @acc[1];\n$code.=<<___;\n\t mov\t@acc[0], @acc[8]\n\t imulq\t8(%rsp), @acc[0]\n\n\t################################# Multiply by b[$i]\n\txor\t@acc[7], @acc[7]\t# @acc[8]=0, cf=0, of=0\n\tmulx\t8*0+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[1]\n\tadcx\t$hi, @acc[2]\n\n\tmulx\t8*1+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[2]\n\tadcx\t$hi, @acc[3]\n\n\tmulx\t8*2+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[3]\n\tadcx\t$hi, @acc[4]\n\n\tmulx\t8*3+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[4]\n\tadcx\t$hi, @acc[5]\n\n\tmulx\t8*4+128($a_ptr), $lo, $hi\n\tadox\t$lo, @acc[5]\n\tadcx\t$hi, @acc[6]\n\n\tmulx\t8*5+128($a_ptr), $lo, $hi\n\t mov\t@acc[0], %rdx\n\tadox\t$lo, @acc[6]\n\tadcx\t@acc[7], $hi\n\tadox\t$hi, @acc[7]\n\n\t################################# reduction\n\txor\t@acc[0], @acc[0]\t# acc[0]=0, cf=0, of=0\n\tmulx\t8*0+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[8]\t\t# guaranteed to be zero\n\tadox\t$hi, @acc[1]\n\n\tmulx\t8*1+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t8*4+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\n\tmulx\t8*5+128($n_ptr), $lo, $hi\n\t mov\t$b_next, %rdx\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\tadcx\t@acc[8], @acc[6]\n\tadox\t@acc[8], @acc[7]\n\tadcx\t@acc[8], @acc[7]\n___\n    push(@acc,shift(@acc));\n}\n$code.=<<___;\n\timulq\t8(%rsp), %rdx\n\tmov\t8*3(%rsp), $b_ptr\t# restore $r_ptr\n\n\t################################# last reduction\n\txor\t@acc[8], @acc[8]\t# @acc[8]=0, cf=0, of=0\n\tmulx\t8*0+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[0]\t\t# guaranteed to be zero\n\tadox\t$hi, @acc[1]\n\n\tmulx\t8*1+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[1]\n\tadox\t$hi, @acc[2]\n\n\tmulx\t8*2+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[2]\n\tadox\t$hi, @acc[3]\n\n\tmulx\t8*3+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[3]\n\tadox\t$hi, @acc[4]\n\n\tmulx\t8*4+128($n_ptr), $lo, $hi\n\tadcx\t$lo, @acc[4]\n\tadox\t$hi, @acc[5]\n\n\tmulx\t8*5+128($n_ptr), $lo, $hi\n\t mov\t@acc[1], %rdx\n\tadcx\t$lo, @acc[5]\n\tadox\t$hi, @acc[6]\n\tadc\t\\$0, @acc[6]\n\t mov\t@acc[4], @acc[8]\n\n\tmov\t@acc[1], 8*0($b_ptr)\n\tmov\t@acc[2], 8*1($b_ptr)\n\tmov\t@acc[3], 8*2($b_ptr)\n\t mov\t@acc[5], $lo\n\tmov\t@acc[4], 8*3($b_ptr)\n\tmov\t@acc[5], 8*4($b_ptr)\n\tmov\t@acc[6], 8*5($b_ptr)\n\t mov\t@acc[6], $hi\n\n\tret\t# __SGX_LVI_HARDENING_CLOBBER__=%rsi\n.cfi_endproc\n.size\t__mulx_mont_383_nonred,.-__mulx_mont_383_nonred\n___\n} } }\n{ my $frame = 4*8 +\t# place for argument off-load +\n\t      2*384/8 +\t# place for 2 384-bit temporary vectors\n\t      8;\t# align\nmy @acc = (@acc,\"%rax\",\"%rdx\",\"%rbx\",\"%rbp\");\n\n# omitting 3 reductions gives ~10% better performance in add-chains\n$code.=<<___;\n.globl\tsqrx_mont_382x\n.hidden\tsqrx_mont_382x\n.type\tsqrx_mont_382x,\\@function,4,\"unwind\"\n.align\t32\nsqrx_mont_382x:\n.cfi_startproc\nsqr_mont_382x\\$1:\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tsub\t\\$$frame, %rsp\n.cfi_adjust_cfa_offset\t$frame\n.cfi_end_prologue\n\n\tmov\t$n_ptr, 8*0(%rsp)\t# n0\n\tmov\t$b_org, $n_ptr\t\t# n_ptr\n\tmov\t$r_ptr, 8*2(%rsp)\n\tmov\t$a_ptr, 8*3(%rsp)\n\n\t#################################\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t8*0($a_ptr), @acc[0]\t# a->re\n\tmov\t8*1($a_ptr), @acc[1]\n\tmov\t8*2($a_ptr), @acc[2]\n\tmov\t8*3($a_ptr), @acc[3]\n\tmov\t8*4($a_ptr), @acc[4]\n\tmov\t8*5($a_ptr), @acc[5]\n\n\tmov\t@acc[0], @acc[6]\n\tadd\t8*6($a_ptr), @acc[0]\t# a->re + a->im\n\tmov\t@acc[1], @acc[7]\n\tadc\t8*7($a_ptr), @acc[1]\n\tmov\t@acc[2], @acc[8]\n\tadc\t8*8($a_ptr), @acc[2]\n\tmov\t@acc[3], @acc[9]\n\tadc\t8*9($a_ptr), @acc[3]\n\tmov\t@acc[4], @acc[10]\n\tadc\t8*10($a_ptr), @acc[4]\n\tmov\t@acc[5], @acc[11]\n\tadc\t8*11($a_ptr), @acc[5]\n\n\tsub\t8*6($a_ptr), @acc[6]\t# a->re - a->im\n\tsbb\t8*7($a_ptr), @acc[7]\n\tsbb\t8*8($a_ptr), @acc[8]\n\tsbb\t8*9($a_ptr), @acc[9]\n\tsbb\t8*10($a_ptr), @acc[10]\n\tsbb\t8*11($a_ptr), @acc[11]\n\tsbb\t$r_ptr, $r_ptr\t\t# borrow flag as mask\n\n\tmov\t@acc[0], 32+8*0(%rsp)\t# t0\n\tmov\t@acc[1], 32+8*1(%rsp)\n\tmov\t@acc[2], 32+8*2(%rsp)\n\tmov\t@acc[3], 32+8*3(%rsp)\n\tmov\t@acc[4], 32+8*4(%rsp)\n\tmov\t@acc[5], 32+8*5(%rsp)\n\n\tmov\t@acc[6], 32+8*6(%rsp)\t# t1\n\tmov\t@acc[7], 32+8*7(%rsp)\n\tmov\t@acc[8], 32+8*8(%rsp)\n\tmov\t@acc[9], 32+8*9(%rsp)\n\tmov\t@acc[10], 32+8*10(%rsp)\n\tmov\t@acc[11], 32+8*11(%rsp)\n\tmov\t$r_ptr,   32+8*12(%rsp)\n\n\t################################# mul_mont_384(ret->im, a->re, a->im, mod, n0);\n\t#mov\t8*3(%rsp), $a_ptr\t# a->re\n\tlea\t48($a_ptr), $b_ptr\t# a->im\n\n\tmov\t48($a_ptr), %rdx\n\tmov\t8*0($a_ptr), %r14\t# @acc[6]\n\tmov\t8*1($a_ptr), %r15\t# @acc[7]\n\tmov\t8*2($a_ptr), %rax\t# @acc[8]\n\tmov\t8*3($a_ptr), %r12\t# @acc[4]\n\tmov\t8*4($a_ptr), %rdi\t# $lo\n\tmov\t8*5($a_ptr), %rbp\t# $hi\n\tlea\t-128($a_ptr), $a_ptr\t# control u-op density\n\tlea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t%r14, %r8, %r9\n\tcall\t__mulx_mont_383_nonred\n___\n{\nmy @acc = map(\"%r$_\",\"dx\",15,\"ax\",12,\"di\",\"bp\",\t# output from __mulx_mont_384\n                      8..11,13,14);\n$code.=<<___;\n\tadd\t@acc[0], @acc[0]\t# add with itself\n\tadc\t@acc[1], @acc[1]\n\tadc\t@acc[2], @acc[2]\n\tadc\t@acc[3], @acc[3]\n\tadc\t@acc[4], @acc[4]\n\tadc\t@acc[5], @acc[5]\n\n\tmov\t@acc[0],  8*6($b_ptr)\t# ret->im\n\tmov\t@acc[1],  8*7($b_ptr)\n\tmov\t@acc[2],  8*8($b_ptr)\n\tmov\t@acc[3],  8*9($b_ptr)\n\tmov\t@acc[4],  8*10($b_ptr)\n\tmov\t@acc[5],  8*11($b_ptr)\n___\n}\n$code.=<<___;\n\t################################# mul_mont_384(ret->re, t0, t1, mod, n0);\n\tlea\t32-128(%rsp), $a_ptr\t# t0 [+u-op density]\n\tlea\t32+8*6(%rsp), $b_ptr\t# t1\n\n\tmov\t32+8*6(%rsp), %rdx\t# t1[0]\n\tmov\t32+8*0(%rsp), %r14\t# @acc[6]\n\tmov\t32+8*1(%rsp), %r15\t# @acc[7]\n\tmov\t32+8*2(%rsp), %rax\t# @acc[8]\n\tmov\t32+8*3(%rsp), %r12\t# @acc[4]\n\tmov\t32+8*4(%rsp), %rdi\t# $lo\n\tmov\t32+8*5(%rsp), %rbp\t# $hi\n\t#lea\t-128($a_ptr), $a_ptr\t# control u-op density\n\t#lea\t-128($n_ptr), $n_ptr\t# control u-op density\n\n\tmulx\t%r14, %r8, %r9\n\tcall\t__mulx_mont_383_nonred\n___\n{\nmy @acc = map(\"%r$_\",\"dx\",15,\"ax\",12,\"di\",\"bp\",\t# output from __mulx_mont_384\n                      8..11,13,14);\n$code.=<<___;\n\tmov\t32+8*12(%rsp), @acc[11]\t# account for sign from a->re - a->im\n\tlea\t128($n_ptr), $n_ptr\n\tmov\t32+8*0(%rsp), @acc[6]\n\tand\t@acc[11], @acc[6]\n\tmov\t32+8*1(%rsp), @acc[7]\n\tand\t@acc[11], @acc[7]\n\tmov\t32+8*2(%rsp), @acc[8]\n\tand\t@acc[11], @acc[8]\n\tmov\t32+8*3(%rsp), @acc[9]\n\tand\t@acc[11], @acc[9]\n\tmov\t32+8*4(%rsp), @acc[10]\n\tand\t@acc[11], @acc[10]\n\tand\t32+8*5(%rsp), @acc[11]\n\n\tsub\t@acc[6], @acc[0]\n\tmov\t8*0($n_ptr), @acc[6]\n\tsbb\t@acc[7], @acc[1]\n\tmov\t8*1($n_ptr), @acc[7]\n\tsbb\t@acc[8], @acc[2]\n\tmov\t8*2($n_ptr), @acc[8]\n\tsbb\t@acc[9], @acc[3]\n\tmov\t8*3($n_ptr), @acc[9]\n\tsbb\t@acc[10], @acc[4]\n\tmov\t8*4($n_ptr), @acc[10]\n\tsbb\t@acc[11], @acc[5]\n\tsbb\t@acc[11], @acc[11]\n\n\tand\t@acc[11], @acc[6]\n\tand\t@acc[11], @acc[7]\n\tand\t@acc[11], @acc[8]\n\tand\t@acc[11], @acc[9]\n\tand\t@acc[11], @acc[10]\n\tand\t8*5($n_ptr), @acc[11]\n\n\tadd\t@acc[6], @acc[0]\n\tadc\t@acc[7], @acc[1]\n\tadc\t@acc[8], @acc[2]\n\tadc\t@acc[9], @acc[3]\n\tadc\t@acc[10], @acc[4]\n\tadc\t@acc[11], @acc[5]\n\n\tmov\t@acc[0],  8*0($b_ptr)\t# ret->re\n\tmov\t@acc[1],  8*1($b_ptr)\n\tmov\t@acc[2],  8*2($b_ptr)\n\tmov\t@acc[3],  8*3($b_ptr)\n\tmov\t@acc[4],  8*4($b_ptr)\n\tmov\t@acc[5],  8*5($b_ptr)\n___\n}\n$code.=<<___;\n\tlea\t$frame(%rsp), %r8\t# size optimization\n\tmov\t8*0(%r8),%r15\n.cfi_restore\t%r15\n\tmov\t8*1(%r8),%r14\n.cfi_restore\t%r14\n\tmov\t8*2(%r8),%r13\n.cfi_restore\t%r13\n\tmov\t8*3(%r8),%r12\n.cfi_restore\t%r12\n\tmov\t8*4(%r8),%rbx\n.cfi_restore\t%rbx\n\tmov\t8*5(%r8),%rbp\n.cfi_restore\t%rbp\n\tlea\t8*6(%r8),%rsp\n.cfi_adjust_cfa_offset\t-$frame-8*6\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\tsqrx_mont_382x,.-sqrx_mont_382x\n___\n}\n\nprint $code;\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/sha256-armv8.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# ====================================================================\n# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n# project.\n# ====================================================================\n#\n# sha256_block procedure for ARMv8.\n#\n# This module is stripped of scalar code paths, with rationale that all\n# known processors are NEON-capable.\n#\n# See original module at CRYPTOGAMS for further details.\n\n$flavour = shift;\n$output  = shift;\n\nif ($flavour && $flavour ne \"void\") {\n    $0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n    ( $xlate=\"${dir}arm-xlate.pl\" and -f $xlate ) or\n    ( $xlate=\"${dir}../../perlasm/arm-xlate.pl\" and -f $xlate) or\n    die \"can't locate arm-xlate.pl\";\n\n    open STDOUT,\"| \\\"$^X\\\" $xlate $flavour $output\";\n} else {\n    open STDOUT,\">$output\";\n}\n\n$BITS=256;\n$SZ=4;\n@Sigma0=( 2,13,22);\n@Sigma1=( 6,11,25);\n@sigma0=( 7,18, 3);\n@sigma1=(17,19,10);\n$rounds=64;\n$reg_t=\"w\";\n$pre=\"blst_\";\n\n($ctx,$inp,$num,$Ktbl)=map(\"x$_\",(0..2,30));\n\n$code.=<<___;\n.comm\t__blst_platform_cap,4\n.text\n\n.align\t6\n.type\t.LK$BITS,%object\n.LK$BITS:\n\t.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n\t.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n\t.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n\t.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n\t.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n\t.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n\t.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n\t.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n\t.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n\t.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n\t.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n\t.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n\t.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n\t.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n\t.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n\t.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\t.long\t0\t//terminator\n.size\t.LK$BITS,.-.LK$BITS\n.asciz\t\"SHA$BITS block transform for ARMv8, CRYPTOGAMS by \\@dot-asm\"\n.align\t2\n___\n\nif ($SZ==4) {\nmy $Ktbl=\"x3\";\n\nmy ($ABCD,$EFGH,$abcd)=map(\"v$_.16b\",(0..2));\nmy @MSG=map(\"v$_.16b\",(4..7));\nmy ($W0,$W1)=(\"v16.4s\",\"v17.4s\");\nmy ($ABCD_SAVE,$EFGH_SAVE)=(\"v18.16b\",\"v19.16b\");\n\n$code.=<<___;\n.globl\t${pre}sha256_block_armv8\n.hidden\t${pre}sha256_block_armv8\n.type\t${pre}sha256_block_armv8,%function\n.align\t6\n${pre}sha256_block_armv8:\n\thint\t\t#34\n.Lv8_entry:\n\tstp\t\tc29,c30,[csp,#-2*__SIZEOF_POINTER__]!\n\tadd\t\tc29,csp,#0\n\n\tld1.32\t\t{$ABCD,$EFGH},[$ctx]\n\tadr\t\t$Ktbl,.LK256\n\n.Loop_hw:\n\tld1\t\t{@MSG[0]-@MSG[3]},[$inp],#64\n\tsub\t\t$num,$num,#1\n\tld1.32\t\t{$W0},[$Ktbl],#16\n\trev32\t\t@MSG[0],@MSG[0]\n\trev32\t\t@MSG[1],@MSG[1]\n\trev32\t\t@MSG[2],@MSG[2]\n\trev32\t\t@MSG[3],@MSG[3]\n\torr\t\t$ABCD_SAVE,$ABCD,$ABCD\t\t// offload\n\torr\t\t$EFGH_SAVE,$EFGH,$EFGH\n___\nfor($i=0;$i<12;$i++) {\n$code.=<<___;\n\tld1.32\t\t{$W1},[$Ktbl],#16\n\tadd.i32\t\t$W0,$W0,@MSG[0]\n\tsha256su0\t@MSG[0],@MSG[1]\n\torr\t\t$abcd,$ABCD,$ABCD\n\tsha256h\t\t$ABCD,$EFGH,$W0\n\tsha256h2\t$EFGH,$abcd,$W0\n\tsha256su1\t@MSG[0],@MSG[2],@MSG[3]\n___\n\t($W0,$W1)=($W1,$W0);\tpush(@MSG,shift(@MSG));\n}\n$code.=<<___;\n\tld1.32\t\t{$W1},[$Ktbl],#16\n\tadd.i32\t\t$W0,$W0,@MSG[0]\n\torr\t\t$abcd,$ABCD,$ABCD\n\tsha256h\t\t$ABCD,$EFGH,$W0\n\tsha256h2\t$EFGH,$abcd,$W0\n\n\tld1.32\t\t{$W0},[$Ktbl],#16\n\tadd.i32\t\t$W1,$W1,@MSG[1]\n\torr\t\t$abcd,$ABCD,$ABCD\n\tsha256h\t\t$ABCD,$EFGH,$W1\n\tsha256h2\t$EFGH,$abcd,$W1\n\n\tld1.32\t\t{$W1},[$Ktbl]\n\tadd.i32\t\t$W0,$W0,@MSG[2]\n\tsub\t\t$Ktbl,$Ktbl,#$rounds*$SZ-16\t// rewind\n\torr\t\t$abcd,$ABCD,$ABCD\n\tsha256h\t\t$ABCD,$EFGH,$W0\n\tsha256h2\t$EFGH,$abcd,$W0\n\n\tadd.i32\t\t$W1,$W1,@MSG[3]\n\torr\t\t$abcd,$ABCD,$ABCD\n\tsha256h\t\t$ABCD,$EFGH,$W1\n\tsha256h2\t$EFGH,$abcd,$W1\n\n\tadd.i32\t\t$ABCD,$ABCD,$ABCD_SAVE\n\tadd.i32\t\t$EFGH,$EFGH,$EFGH_SAVE\n\n\tcbnz\t\t$num,.Loop_hw\n\n\tst1.32\t\t{$ABCD,$EFGH},[$ctx]\n\n\tldr\t\tc29,[csp],#2*__SIZEOF_POINTER__\n\tret\n.size\t${pre}sha256_block_armv8,.-${pre}sha256_block_armv8\n___\n}\n\nif ($SZ==4) {\t######################################### NEON stuff #\n# You'll surely note a lot of similarities with sha256-armv4 module,\n# and of course it's not a coincidence. sha256-armv4 was used as\n# initial template, but was adapted for ARMv8 instruction set and\n# extensively re-tuned for all-round performance.\n\nmy @V = ($A,$B,$C,$D,$E,$F,$G,$H) = map(\"w$_\",(3..10));\nmy ($t0,$t1,$t2,$t3,$t4) = map(\"w$_\",(11..15));\nmy $Ktbl=\"x16\";\nmy $Xfer=\"x17\";\nmy @X = map(\"q$_\",(0..3));\nmy ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7) = map(\"q$_\",(4..7,16..19));\nmy $j=0;\n\nsub AUTOLOAD()          # thunk [simplified] x86-style perlasm\n{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\\./;\n  my $arg = pop;\n    $arg = \"#$arg\" if ($arg*1 eq $arg);\n    $code .= \"\\t$opcode\\t\".join(',',@_,$arg).\"\\n\";\n}\n\nsub Dscalar { shift =~ m|[qv]([0-9]+)|?\"d$1\":\"\"; }\nsub Dlo     { shift =~ m|[qv]([0-9]+)|?\"v$1.d[0]\":\"\"; }\nsub Dhi     { shift =~ m|[qv]([0-9]+)|?\"v$1.d[1]\":\"\"; }\n\nsub Xupdate()\n{ use integer;\n  my $body = shift;\n  my @insns = (&$body,&$body,&$body,&$body);\n  my ($a,$b,$c,$d,$e,$f,$g,$h);\n\n\t&ext_8\t\t($T0,@X[0],@X[1],4);\t# X[1..4]\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&ext_8\t\t($T3,@X[2],@X[3],4);\t# X[9..12]\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&mov\t\t(&Dscalar($T7),&Dhi(@X[3]));\t# X[14..15]\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&ushr_32\t($T2,$T0,$sigma0[0]);\n\t eval(shift(@insns));\n\t&ushr_32\t($T1,$T0,$sigma0[2]);\n\t eval(shift(@insns));\n\t&add_32 \t(@X[0],@X[0],$T3);\t# X[0..3] += X[9..12]\n\t eval(shift(@insns));\n\t&sli_32\t\t($T2,$T0,32-$sigma0[0]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&ushr_32\t($T3,$T0,$sigma0[1]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&eor_8\t\t($T1,$T1,$T2);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&sli_32\t\t($T3,$T0,32-$sigma0[1]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &ushr_32\t($T4,$T7,$sigma1[0]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&eor_8\t\t($T1,$T1,$T3);\t\t# sigma0(X[1..4])\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &sli_32\t($T4,$T7,32-$sigma1[0]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &ushr_32\t($T5,$T7,$sigma1[2]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &ushr_32\t($T3,$T7,$sigma1[1]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&add_32\t\t(@X[0],@X[0],$T1);\t# X[0..3] += sigma0(X[1..4])\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &sli_u32\t($T3,$T7,32-$sigma1[1]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &eor_8\t($T5,$T5,$T4);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &eor_8\t($T5,$T5,$T3);\t\t# sigma1(X[14..15])\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&add_32\t\t(@X[0],@X[0],$T5);\t# X[0..1] += sigma1(X[14..15])\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &ushr_32\t($T6,@X[0],$sigma1[0]);\n\t eval(shift(@insns));\n\t  &ushr_32\t($T7,@X[0],$sigma1[2]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &sli_32\t($T6,@X[0],32-$sigma1[0]);\n\t eval(shift(@insns));\n\t  &ushr_32\t($T5,@X[0],$sigma1[1]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &eor_8\t($T7,$T7,$T6);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t  &sli_32\t($T5,@X[0],32-$sigma1[1]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&ld1_32\t\t(\"{$T0}\",\"[$Ktbl], #16\");\n\t eval(shift(@insns));\n\t  &eor_8\t($T7,$T7,$T5);\t\t# sigma1(X[16..17])\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&eor_8\t\t($T5,$T5,$T5);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&mov\t\t(&Dhi($T5), &Dlo($T7));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&add_32\t\t(@X[0],@X[0],$T5);\t# X[2..3] += sigma1(X[16..17])\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&add_32\t\t($T0,$T0,@X[0]);\n\t while($#insns>=1) { eval(shift(@insns)); }\n\t&st1_32\t\t(\"{$T0}\",\"[$Xfer], #16\");\n\t eval(shift(@insns));\n\n\tpush(@X,shift(@X));\t\t# \"rotate\" X[]\n}\n\nsub Xpreload()\n{ use integer;\n  my $body = shift;\n  my @insns = (&$body,&$body,&$body,&$body);\n  my ($a,$b,$c,$d,$e,$f,$g,$h);\n\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&ld1_8\t\t(\"{@X[0]}\",\"[$inp],#16\");\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&ld1_32\t\t(\"{$T0}\",\"[$Ktbl],#16\");\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&rev32\t\t(@X[0],@X[0]);\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t eval(shift(@insns));\n\t&add_32\t\t($T0,$T0,@X[0]);\n\t foreach (@insns) { eval; }\t# remaining instructions\n\t&st1_32\t\t(\"{$T0}\",\"[$Xfer], #16\");\n\n\tpush(@X,shift(@X));\t\t# \"rotate\" X[]\n}\n\nsub body_00_15 () {\n\t(\n\t'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.\n\t'&add\t($h,$h,$t1)',\t\t\t# h+=X[i]+K[i]\n\t'&add\t($a,$a,$t4);'.\t\t\t# h+=Sigma0(a) from the past\n\t'&and\t($t1,$f,$e)',\n\t'&bic\t($t4,$g,$e)',\n\t'&eor\t($t0,$e,$e,\"ror#\".($Sigma1[1]-$Sigma1[0]))',\n\t'&add\t($a,$a,$t2)',\t\t\t# h+=Maj(a,b,c) from the past\n\t'&orr\t($t1,$t1,$t4)',\t\t\t# Ch(e,f,g)\n\t'&eor\t($t0,$t0,$e,\"ror#\".($Sigma1[2]-$Sigma1[0]))',\t# Sigma1(e)\n\t'&eor\t($t4,$a,$a,\"ror#\".($Sigma0[1]-$Sigma0[0]))',\n\t'&add\t($h,$h,$t1)',\t\t\t# h+=Ch(e,f,g)\n\t'&ror\t($t0,$t0,\"#$Sigma1[0]\")',\n\t'&eor\t($t2,$a,$b)',\t\t\t# a^b, b^c in next round\n\t'&eor\t($t4,$t4,$a,\"ror#\".($Sigma0[2]-$Sigma0[0]))',\t# Sigma0(a)\n\t'&add\t($h,$h,$t0)',\t\t\t# h+=Sigma1(e)\n\t'&ldr\t($t1,sprintf \"[sp,#%d]\",4*(($j+1)&15))\tif (($j&15)!=15);'.\n\t'&ldr\t($t1,\"[$Ktbl]\")\t\t\t\tif ($j==15);'.\n\t'&and\t($t3,$t3,$t2)',\t\t\t# (b^c)&=(a^b)\n\t'&ror\t($t4,$t4,\"#$Sigma0[0]\")',\n\t'&add\t($d,$d,$h)',\t\t\t# d+=h\n\t'&eor\t($t3,$t3,$b)',\t\t\t# Maj(a,b,c)\n\t'$j++;\tunshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'\n\t)\n}\n\n$code.=<<___;\n.globl\t${pre}sha256_block_data_order\n.hidden\t${pre}sha256_block_data_order\n.type\t${pre}sha256_block_data_order,%function\n.align\t4\n${pre}sha256_block_data_order:\n\thint\t#34\n\tadrp\tc16,__blst_platform_cap\n\tldr\tw16,[c16,#:lo12:__blst_platform_cap]\n\ttst\tw16,#1\n\tb.ne\t.Lv8_entry\n\n\tstp\tc29, c30, [csp, #-2*__SIZEOF_POINTER__]!\n\tmov\tc29, csp\n\tsub\tcsp,csp,#16*4\n\n\tadr\t$Ktbl,.LK256\n\tadd\t$num,$inp,$num,lsl#6\t// len to point at the end of inp\n\n\tld1.8\t{@X[0]},[$inp], #16\n\tld1.8\t{@X[1]},[$inp], #16\n\tld1.8\t{@X[2]},[$inp], #16\n\tld1.8\t{@X[3]},[$inp], #16\n\tld1.32\t{$T0},[$Ktbl], #16\n\tld1.32\t{$T1},[$Ktbl], #16\n\tld1.32\t{$T2},[$Ktbl], #16\n\tld1.32\t{$T3},[$Ktbl], #16\n\trev32\t@X[0],@X[0]\t\t// yes, even on\n\trev32\t@X[1],@X[1]\t\t// big-endian\n\trev32\t@X[2],@X[2]\n\trev32\t@X[3],@X[3]\n\tcmov\t$Xfer,sp\n\tadd.32\t$T0,$T0,@X[0]\n\tadd.32\t$T1,$T1,@X[1]\n\tadd.32\t$T2,$T2,@X[2]\n\tst1.32\t{$T0-$T1},[$Xfer], #32\n\tadd.32\t$T3,$T3,@X[3]\n\tst1.32\t{$T2-$T3},[$Xfer]\n\tcsub\t$Xfer,$Xfer,#32\n\n\tldp\t$A,$B,[$ctx]\n\tldp\t$C,$D,[$ctx,#8]\n\tldp\t$E,$F,[$ctx,#16]\n\tldp\t$G,$H,[$ctx,#24]\n\tldr\t$t1,[sp,#0]\n\tmov\t$t2,wzr\n\teor\t$t3,$B,$C\n\tmov\t$t4,wzr\n\tb\t.L_00_48\n\n.align\t4\n.L_00_48:\n___\n\t&Xupdate(\\&body_00_15);\n\t&Xupdate(\\&body_00_15);\n\t&Xupdate(\\&body_00_15);\n\t&Xupdate(\\&body_00_15);\n$code.=<<___;\n\tcmp\t$t1,#0\t\t\t\t// check for K256 terminator\n\tldr\t$t1,[sp,#0]\n\tcsub\t$Xfer,$Xfer,#64\n\tbne\t.L_00_48\n\n\tcsub\t$Ktbl,$Ktbl,#256\t\t// rewind $Ktbl\n\tcmp\t$inp,$num\n\tmov\t$Xfer, #-64\n\tcsel\t$Xfer, $Xfer, xzr, eq\n\tcadd\t$inp,$inp,$Xfer\t\t\t// avoid SEGV\n\tcmov\t$Xfer,sp\n___\n\t&Xpreload(\\&body_00_15);\n\t&Xpreload(\\&body_00_15);\n\t&Xpreload(\\&body_00_15);\n\t&Xpreload(\\&body_00_15);\n$code.=<<___;\n\tadd\t$A,$A,$t4\t\t\t// h+=Sigma0(a) from the past\n\tldp\t$t0,$t1,[$ctx,#0]\n\tadd\t$A,$A,$t2\t\t\t// h+=Maj(a,b,c) from the past\n\tldp\t$t2,$t3,[$ctx,#8]\n\tadd\t$A,$A,$t0\t\t\t// accumulate\n\tadd\t$B,$B,$t1\n\tldp\t$t0,$t1,[$ctx,#16]\n\tadd\t$C,$C,$t2\n\tadd\t$D,$D,$t3\n\tldp\t$t2,$t3,[$ctx,#24]\n\tadd\t$E,$E,$t0\n\tadd\t$F,$F,$t1\n\t ldr\t$t1,[sp,#0]\n\tstp\t$A,$B,[$ctx,#0]\n\tadd\t$G,$G,$t2\n\t mov\t$t2,wzr\n\tstp\t$C,$D,[$ctx,#8]\n\tadd\t$H,$H,$t3\n\tstp\t$E,$F,[$ctx,#16]\n\t eor\t$t3,$B,$C\n\tstp\t$G,$H,[$ctx,#24]\n\t mov\t$t4,wzr\n\t cmov\t$Xfer,sp\n\tb.ne\t.L_00_48\n\n\tldr\tc29,[c29]\n\tadd\tcsp,csp,#16*4+2*__SIZEOF_POINTER__\n\tret\n.size\t${pre}sha256_block_data_order,.-${pre}sha256_block_data_order\n___\n}\n\n{\nmy ($out,$inp,$len) = map(\"x$_\",(0..2));\n\n$code.=<<___;\n.globl\t${pre}sha256_emit\n.hidden\t${pre}sha256_emit\n.type\t${pre}sha256_emit,%function\n.align\t4\n${pre}sha256_emit:\n\thint\t#34\n\tldp\tx4,x5,[$inp]\n\tldp\tx6,x7,[$inp,#16]\n#ifndef\t__AARCH64EB__\n\trev\tx4,x4\n\trev\tx5,x5\n\trev\tx6,x6\n\trev\tx7,x7\n#endif\n\tstr\tw4,[$out,#4]\n\tlsr\tx4,x4,#32\n\tstr\tw5,[$out,#12]\n\tlsr\tx5,x5,#32\n\tstr\tw6,[$out,#20]\n\tlsr\tx6,x6,#32\n\tstr\tw7,[$out,#28]\n\tlsr\tx7,x7,#32\n\tstr\tw4,[$out,#0]\n\tstr\tw5,[$out,#8]\n\tstr\tw6,[$out,#16]\n\tstr\tw7,[$out,#24]\n\tret\n.size\t${pre}sha256_emit,.-${pre}sha256_emit\n\n.globl\t${pre}sha256_bcopy\n.hidden\t${pre}sha256_bcopy\n.type\t${pre}sha256_bcopy,%function\n.align\t4\n${pre}sha256_bcopy:\n\thint\t#34\n.Loop_bcopy:\n\tldrb\tw3,[$inp],#1\n\tsub\t$len,$len,#1\n\tstrb\tw3,[$out],#1\n\tcbnz\t$len,.Loop_bcopy\n\tret\n.size\t${pre}sha256_bcopy,.-${pre}sha256_bcopy\n\n.globl\t${pre}sha256_hcopy\n.hidden\t${pre}sha256_hcopy\n.type\t${pre}sha256_hcopy,%function\n.align\t4\n${pre}sha256_hcopy:\n\thint\t#34\n\tldp\tx4,x5,[$inp]\n\tldp\tx6,x7,[$inp,#16]\n\tstp\tx4,x5,[$out]\n\tstp\tx6,x7,[$out,#16]\n\tret\n.size\t${pre}sha256_hcopy,.-${pre}sha256_hcopy\n___\n}\n\n{   my  %opcode = (\n\t\"sha256h\"\t=> 0x5e004000,\t\"sha256h2\"\t=> 0x5e005000,\n\t\"sha256su0\"\t=> 0x5e282800,\t\"sha256su1\"\t=> 0x5e006000\t);\n\n    sub unsha256 {\n\tmy ($mnemonic,$arg)=@_;\n\n\t$arg =~ m/[qv]([0-9]+)[^,]*,\\s*[qv]([0-9]+)[^,]*(?:,\\s*[qv]([0-9]+))?/o\n\t&&\n\tsprintf \".inst\\t0x%08x\\t//%s %s\",\n\t\t\t$opcode{$mnemonic}|$1|($2<<5)|($3<<16),\n\t\t\t$mnemonic,$arg;\n    }\n}\n\nopen SELF,$0;\nwhile(<SELF>) {\n        next if (/^#!/);\n        last if (!s/^#/\\/\\// and !/^$/);\n        print;\n}\nclose SELF;\n\nforeach(split(\"\\n\",$code)) {\n\n\ts/\\`([^\\`]*)\\`/eval($1)/ge;\n\n\ts/\\b(sha512\\w+)\\s+([qv].*)/unsha512($1,$2)/ge\tor\n\ts/\\b(sha256\\w+)\\s+([qv].*)/unsha256($1,$2)/ge;\n\n\ts/\\bq([0-9]+)\\b/v$1.16b/g;\t\t# old->new registers\n\n\ts/\\.[ui]?8(\\s)/$1/;\n\ts/\\.\\w?64\\b//\t\tand s/\\.16b/\\.2d/g\tor\n\ts/\\.\\w?32\\b//\t\tand s/\\.16b/\\.4s/g;\n\tm/\\bext\\b/\t\tand s/\\.2d/\\.16b/g\tor\n\tm/(ld|st)1[^\\[]+\\[0\\]/\tand s/\\.4s/\\.s/g;\n\n\tprint $_,\"\\n\";\n}\n\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/sha256-portable-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# ====================================================================\n# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n# project.\n# ====================================================================\n#\n# sha256_block procedure for x86_64.\n#\n# Scalar-only version with minor twist minimizing 'lea' instructions.\n\n$flavour = shift;\n$output  = pop;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$pre=\"blst_\";\n$func=\"${pre}sha256_block_data_order\";\n$TABLE=\"K256\";\n$SZ=4;\n@ROT=($A,$B,$C,$D,$E,$F,$G,$H)=(\"%eax\",\"%ebx\",\"%ecx\",\"%edx\",\n\t\t\t\t\"%r8d\",\"%r9d\",\"%r10d\",\"%r11d\");\n($T1,$a0,$a1,$a2,$a3)=(\"%r12d\",\"%r13d\",\"%r14d\",\"%r15d\",\"%edi\");\n@Sigma0=( 2,13,22);\n@Sigma1=( 6,11,25);\n@sigma0=( 7,18, 3);\n@sigma1=(17,19,10);\n$rounds=64;\n\n$ctx=\"%rdi\";\t# 1st arg, zapped by $a3\n$inp=\"%rsi\";\t# 2nd arg\n$Tbl=\"%rbp\";\n\n$_ctx=\"16*$SZ+0*8(%rsp)\";\n$_inp=\"16*$SZ+1*8(%rsp)\";\n$_end=\"16*$SZ+2*8(%rsp)\";\n$framesz=\"16*$SZ+3*8\";\n\nsub ROUND_00_15()\n{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;\n  my $STRIDE=$SZ;\n  #   $STRIDE += 16 if ($i%(16/$SZ)==(16/$SZ-1));\n\n$code.=<<___;\n\tror\t\\$`$Sigma1[2]-$Sigma1[1]`,$a0\n\tmov\t$f,$a2\n\n\txor\t$e,$a0\n\tror\t\\$`$Sigma0[2]-$Sigma0[1]`,$a1\n\txor\t$g,$a2\t\t\t# f^g\n\n\tmov\t$T1,`$SZ*($i&0xf)`(%rsp)\n\txor\t$a,$a1\n\tand\t$e,$a2\t\t\t# (f^g)&e\n\n\tror\t\\$`$Sigma1[1]-$Sigma1[0]`,$a0\n\tadd\t$h,$T1\t\t\t# T1+=h\n\txor\t$g,$a2\t\t\t# Ch(e,f,g)=((f^g)&e)^g\n\n\tror\t\\$`$Sigma0[1]-$Sigma0[0]`,$a1\n\txor\t$e,$a0\n\tadd\t$a2,$T1\t\t\t# T1+=Ch(e,f,g)\n\n\tmov\t$a,$a2\n\tadd\t`$SZ*$i`($Tbl),$T1\t# T1+=K[round]\n\txor\t$a,$a1\n\n\txor\t$b,$a2\t\t\t# a^b, b^c in next round\n\tror\t\\$$Sigma1[0],$a0\t# Sigma1(e)\n\tmov\t$b,$h\n\n\tand\t$a2,$a3\n\tror\t\\$$Sigma0[0],$a1\t# Sigma0(a)\n\tadd\t$a0,$T1\t\t\t# T1+=Sigma1(e)\n\n\txor\t$a3,$h\t\t\t# h=Maj(a,b,c)=Ch(a^b,c,b)\n\tadd\t$T1,$d\t\t\t# d+=T1\n\tadd\t$T1,$h\t\t\t# h+=T1\n___\n$code.=<<___ if ($i==31);\n\tlea\t`16*$SZ`($Tbl),$Tbl\t# round+=16\n___\n$code.=<<___ if ($i<15);\n\tadd\t$a1,$h\t\t\t# h+=Sigma0(a)\n___\n\t($a2,$a3) = ($a3,$a2);\n}\n\nsub ROUND_16_XX()\n{ my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;\n\n$code.=<<___;\n\tmov\t`$SZ*(($i+1)&0xf)`(%rsp),$a0\n\tmov\t`$SZ*(($i+14)&0xf)`(%rsp),$a2\n\n\tmov\t$a0,$T1\n\tror\t\\$`$sigma0[1]-$sigma0[0]`,$a0\n\tadd\t$a1,$a\t\t\t# modulo-scheduled h+=Sigma0(a)\n\tmov\t$a2,$a1\n\tror\t\\$`$sigma1[1]-$sigma1[0]`,$a2\n\n\txor\t$T1,$a0\n\tshr\t\\$$sigma0[2],$T1\n\tror\t\\$$sigma0[0],$a0\n\txor\t$a1,$a2\n\tshr\t\\$$sigma1[2],$a1\n\n\tror\t\\$$sigma1[0],$a2\n\txor\t$a0,$T1\t\t\t# sigma0(X[(i+1)&0xf])\n\txor\t$a1,$a2\t\t\t# sigma1(X[(i+14)&0xf])\n\tadd\t`$SZ*(($i+9)&0xf)`(%rsp),$T1\n\n\tadd\t`$SZ*($i&0xf)`(%rsp),$T1\n\tmov\t$e,$a0\n\tadd\t$a2,$T1\n\tmov\t$a,$a1\n___\n\t&ROUND_00_15(@_);\n}\n\n$code=<<___;\n.comm\t__blst_platform_cap,4\n.text\n\n.globl\t$func\n.type\t$func,\\@function,3,\"unwind\"\n.align\t16\n$func:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tmov\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n#ifdef __BLST_PORTABLE__\n\ttestl\t\\$2,__blst_platform_cap(%rip)\n\tjnz\t.L${func}\\$2\n#endif\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tshl\t\\$4,%rdx\t\t# num*16\n\tsub\t\\$$framesz,%rsp\n.cfi_alloca\t$framesz\n.cfi_def_cfa\t%rsp\n.cfi_end_prologue\n\tlea\t($inp,%rdx,$SZ),%rdx\t# inp+num*16*$SZ\n\tmov\t$ctx,$_ctx\t\t# save ctx, 1st arg\n\tmov\t$inp,$_inp\t\t# save inp, 2nd arh\n\tmov\t%rdx,$_end\t\t# save end pointer, \"3rd\" arg\n\n\tmov\t$SZ*0($ctx),$A\n\tmov\t$SZ*1($ctx),$B\n\tmov\t$SZ*2($ctx),$C\n\tmov\t$SZ*3($ctx),$D\n\tmov\t$SZ*4($ctx),$E\n\tmov\t$SZ*5($ctx),$F\n\tmov\t$SZ*6($ctx),$G\n\tmov\t$SZ*7($ctx),$H\n\tjmp\t.Lloop\n\n.align\t16\n.Lloop:\n\tmov\t$B,$a3\n\tlea\t$TABLE(%rip),$Tbl\n\txor\t$C,$a3\t\t\t# magic\n___\n\tfor($i=0;$i<16;$i++) {\n\t\t$code.=\"\tmov\t$SZ*$i($inp),$T1\\n\";\n\t\t$code.=\"\tmov\t@ROT[4],$a0\\n\";\n\t\t$code.=\"\tmov\t@ROT[0],$a1\\n\";\n\t\t$code.=\"\tbswap\t$T1\\n\";\n\t\t&ROUND_00_15($i,@ROT);\n\t\tunshift(@ROT,pop(@ROT));\n\t}\n$code.=<<___;\n\tjmp\t.Lrounds_16_xx\n.align\t16\n.Lrounds_16_xx:\n___\n\tfor(;$i<32;$i++) {\n\t\t&ROUND_16_XX($i,@ROT);\n\t\tunshift(@ROT,pop(@ROT));\n\t}\n\n$code.=<<___;\n\tcmpb\t\\$0x19,`$SZ-1`($Tbl)\n\tjnz\t.Lrounds_16_xx\n\n\tmov\t$_ctx,$ctx\n\tadd\t$a1,$A\t\t\t# modulo-scheduled h+=Sigma0(a)\n\tlea\t16*$SZ($inp),$inp\n\n\tadd\t$SZ*0($ctx),$A\n\tadd\t$SZ*1($ctx),$B\n\tadd\t$SZ*2($ctx),$C\n\tadd\t$SZ*3($ctx),$D\n\tadd\t$SZ*4($ctx),$E\n\tadd\t$SZ*5($ctx),$F\n\tadd\t$SZ*6($ctx),$G\n\tadd\t$SZ*7($ctx),$H\n\n\tcmp\t$_end,$inp\n\n\tmov\t$A,$SZ*0($ctx)\n\tmov\t$B,$SZ*1($ctx)\n\tmov\t$C,$SZ*2($ctx)\n\tmov\t$D,$SZ*3($ctx)\n\tmov\t$E,$SZ*4($ctx)\n\tmov\t$F,$SZ*5($ctx)\n\tmov\t$G,$SZ*6($ctx)\n\tmov\t$H,$SZ*7($ctx)\n\tjb\t.Lloop\n\n\tlea\t$framesz+6*8(%rsp),%r11\n.cfi_def_cfa\t%r11,8\n\tmov\t$framesz(%rsp),%r15\n\tmov\t-40(%r11),%r14\n\tmov\t-32(%r11),%r13\n\tmov\t-24(%r11),%r12\n\tmov\t-16(%r11),%rbx\n\tmov\t-8(%r11),%rbp\n.cfi_epilogue\n\tlea\t(%r11),%rsp\n\tret\n.cfi_endproc\n.size\t$func,.-$func\n\n#ifndef __BLST_PORTABLE__\n.section\t.rodata\n.align\t64\n.type\t$TABLE,\\@object\n$TABLE:\n\t.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n\t.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n\t.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n\t.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n\t.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n\t.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n\t.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n\t.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n\t.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n\t.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n\t.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n\t.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n\t.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n\t.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n\t.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n\t.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n\t.asciz\t\"SHA256 block transform for x86_64, CRYPTOGAMS by \\@dot-asm\"\n___\n{\nmy ($out,$inp,$len) = $win64 ? (\"%rcx\",\"%rdx\",\"%r8\") :  # Win64 order\n                               (\"%rdi\",\"%rsi\",\"%rdx\");  # Unix order\n$code.=<<___;\n.globl\t${pre}sha256_emit\n.hidden\t${pre}sha256_emit\n.type\t${pre}sha256_emit,\\@abi-omnipotent\n.align\t16\n${pre}sha256_emit:\n\tmov\t0($inp), %r8\n\tmov\t8($inp), %r9\n\tmov\t16($inp), %r10\n\tbswap\t%r8\n\tmov\t24($inp), %r11\n\tbswap\t%r9\n\tmov\t%r8d, 4($out)\n\tbswap\t%r10\n\tmov\t%r9d, 12($out)\n\tbswap\t%r11\n\tmov\t%r10d, 20($out)\n\tshr\t\\$32, %r8\n\tmov\t%r11d, 28($out)\n\tshr\t\\$32, %r9\n\tmov\t%r8d, 0($out)\n\tshr\t\\$32, %r10\n\tmov\t%r9d, 8($out)\n\tshr\t\\$32, %r11\n\tmov\t%r10d, 16($out)\n\tmov\t%r11d, 24($out)\n\tret\n.size\t${pre}sha256_emit,.-${pre}sha256_emit\n\n.globl\t${pre}sha256_bcopy\n.hidden\t${pre}sha256_bcopy\n.type\t${pre}sha256_bcopy,\\@abi-omnipotent\n.align\t16\n${pre}sha256_bcopy:\n\tsub\t$inp, $out\n.Loop_bcopy:\n\tmovzb\t($inp), %eax\n\tlea\t1($inp), $inp\n\tmov\t%al, -1($out,$inp)\n\tdec\t$len\n\tjnz\t.Loop_bcopy\n\tret\n.size\t${pre}sha256_bcopy,.-${pre}sha256_bcopy\n\n.globl\t${pre}sha256_hcopy\n.hidden\t${pre}sha256_hcopy\n.type\t${pre}sha256_hcopy,\\@abi-omnipotent\n.align\t16\n${pre}sha256_hcopy:\n\tmov\t0($inp), %r8\n\tmov\t8($inp), %r9\n\tmov\t16($inp), %r10\n\tmov\t24($inp), %r11\n\tmov\t%r8, 0($out)\n\tmov\t%r9, 8($out)\n\tmov\t%r10, 16($out)\n\tmov\t%r11, 24($out)\n\tret\n.size\t${pre}sha256_hcopy,.-${pre}sha256_hcopy\n#endif\n___\n}\n\nforeach (split(\"\\n\",$code)) {\n\ts/\\`([^\\`]*)\\`/eval $1/geo;\n\tprint $_,\"\\n\";\n}\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/sha256-x86_64.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# ====================================================================\n# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL\n# project.\n# ====================================================================\n#\n# sha256_block procedure for x86_64.\n#\n# This module is stripped of AVX and even scalar code paths, with\n# rationale that\n#\n# a) AVX1 is [justifiably] faster than SSSE3 code path only on *one*\n#    processor, venerable Sandy Bridge;\n# b) AVX2 incurs costly power transitions, which would be justifiable\n#    if AVX2 code was executing most of the time, which is not the\n#    case in the context;\n# c) all contemporary processors support SSSE3, so that nobody would\n#    actually use scalar code path anyway;\n#\n# See original module at CRYPTOGAMS for further details.\n\n$flavour = shift;\n$output  = pop;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\n$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\\.asm$/);\n\n$0 =~ m/(.*[\\/\\\\])[^\\/\\\\]+$/; $dir=$1;\n( $xlate=\"${dir}x86_64-xlate.pl\" and -f $xlate ) or\n( $xlate=\"${dir}../../perlasm/x86_64-xlate.pl\" and -f $xlate) or\ndie \"can't locate x86_64-xlate.pl\";\n\nopen STDOUT,\"| \\\"$^X\\\" \\\"$xlate\\\" $flavour \\\"$output\\\"\"\n    or die \"can't call $xlate: $!\";\n\n$pre=\"blst_\";\n$func=\"${pre}sha256_block_data_order\";\n$TABLE=\"K256\";\n$SZ=4;\n@ROT=($A,$B,$C,$D,$E,$F,$G,$H)=(\"%eax\",\"%ebx\",\"%ecx\",\"%edx\",\n\t\t\t\t\"%r8d\",\"%r9d\",\"%r10d\",\"%r11d\");\n($T1,$a0,$a1,$a2,$a3)=(\"%r12d\",\"%r13d\",\"%r14d\",\"%r15d\",\"%edi\");\n@Sigma0=( 2,13,22);\n@Sigma1=( 6,11,25);\n@sigma0=( 7,18, 3);\n@sigma1=(17,19,10);\n$rounds=64;\n\n$ctx=\"%rdi\";\t# 1st arg, zapped by $a3\n$inp=\"%rsi\";\t# 2nd arg\n$Tbl=\"%rbp\";\n\n$_ctx=\"16*$SZ+0*8(%rsp)\";\n$_inp=\"16*$SZ+1*8(%rsp)\";\n$_end=\"16*$SZ+2*8(%rsp)\";\n$framesz=\"16*$SZ+3*8\";\n\n$code=<<___;\n.comm\t__blst_platform_cap,4\n\n.section\t.rodata\n.align\t64\n.type\t$TABLE,\\@object\n$TABLE:\n\t.long\t0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5\n\t.long\t0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5\n\t.long\t0xd807aa98,0x12835b01,0x243185be,0x550c7dc3\n\t.long\t0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174\n\t.long\t0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc\n\t.long\t0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da\n\t.long\t0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7\n\t.long\t0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967\n\t.long\t0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13\n\t.long\t0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85\n\t.long\t0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3\n\t.long\t0xd192e819,0xd6990624,0xf40e3585,0x106aa070\n\t.long\t0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5\n\t.long\t0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3\n\t.long\t0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208\n\t.long\t0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2\n\n\t.long\t0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f\n\t.long\t0x03020100,0x0b0a0908,0xffffffff,0xffffffff\n\t.long\t0xffffffff,0xffffffff,0x03020100,0x0b0a0908\n\t.asciz\t\"SHA256 block transform for x86_64, CRYPTOGAMS by \\@dot-asm\"\n.text\n___\n\n######################################################################\n# SIMD code paths\n#\n{{{\n######################################################################\n# Intel SHA Extensions implementation of SHA256 update function.\n#\nmy ($ctx,$inp,$num,$Tbl)=(\"%rdi\",\"%rsi\",\"%rdx\",\"%rcx\");\n\nmy ($Wi,$ABEF,$CDGH,$TMP,$BSWAP,$ABEF_SAVE,$CDGH_SAVE)=map(\"%xmm$_\",(0..2,7..10));\nmy @MSG=map(\"%xmm$_\",(3..6));\n\n$code.=<<___;\n.globl\t${pre}sha256_block_data_order_shaext\n.hidden\t${pre}sha256_block_data_order_shaext\n.type\t${pre}sha256_block_data_order_shaext,\\@function,3,\"unwind\"\n.align\t64\n${pre}sha256_block_data_order_shaext:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tmov\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n.L${func}\\$2:\n___\n$code.=<<___ if ($win64);\n\tsub\t\\$0x50,%rsp\n.cfi_alloca\t0x50\n\tmovaps\t%xmm6,-0x50(%rbp)\n\tmovaps\t%xmm7,-0x40(%rbp)\n\tmovaps\t%xmm8,-0x30(%rbp)\n\tmovaps\t%xmm9,-0x20(%rbp)\n\tmovaps\t%xmm10,-0x10(%rbp)\n.cfi_offset\t%xmm6-%xmm10,-0x60\n___\n$code.=<<___;\n.cfi_end_prologue\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tlea\t\tK256+0x80(%rip),$Tbl\n\tmovdqu\t\t($ctx),$ABEF\t\t# DCBA\n\tmovdqu\t\t16($ctx),$CDGH\t\t# HGFE\n\tmovdqa\t\t0x100-0x80($Tbl),$TMP\t# byte swap mask\n\n\tpshufd\t\t\\$0x1b,$ABEF,$Wi\t# ABCD\n\tpshufd\t\t\\$0xb1,$ABEF,$ABEF\t# CDAB\n\tpshufd\t\t\\$0x1b,$CDGH,$CDGH\t# EFGH\n\tmovdqa\t\t$TMP,$BSWAP\t\t# offload\n\tpalignr\t\t\\$8,$CDGH,$ABEF\t\t# ABEF\n\tpunpcklqdq\t$Wi,$CDGH\t\t# CDGH\n\tjmp\t\t.Loop_shaext\n\n.align\t16\n.Loop_shaext:\n\tmovdqu\t\t($inp),@MSG[0]\n\tmovdqu\t\t0x10($inp),@MSG[1]\n\tmovdqu\t\t0x20($inp),@MSG[2]\n\tpshufb\t\t$TMP,@MSG[0]\n\tmovdqu\t\t0x30($inp),@MSG[3]\n\n\tmovdqa\t\t0*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[0],$Wi\n\tpshufb\t\t$TMP,@MSG[1]\n\tmovdqa\t\t$CDGH,$CDGH_SAVE\t# offload\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 0-3\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tnop\n\tmovdqa\t\t$ABEF,$ABEF_SAVE\t# offload\n\tsha256rnds2\t$CDGH,$ABEF\n\n\tmovdqa\t\t1*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[1],$Wi\n\tpshufb\t\t$TMP,@MSG[2]\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 4-7\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tlea\t\t0x40($inp),$inp\n\tsha256msg1\t@MSG[1],@MSG[0]\n\tsha256rnds2\t$CDGH,$ABEF\n\n\tmovdqa\t\t2*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[2],$Wi\n\tpshufb\t\t$TMP,@MSG[3]\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 8-11\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tmovdqa\t\t@MSG[3],$TMP\n\tpalignr\t\t\\$4,@MSG[2],$TMP\n\tnop\n\tpaddd\t\t$TMP,@MSG[0]\n\tsha256msg1\t@MSG[2],@MSG[1]\n\tsha256rnds2\t$CDGH,$ABEF\n\n\tmovdqa\t\t3*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[3],$Wi\n\tsha256msg2\t@MSG[3],@MSG[0]\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 12-15\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tmovdqa\t\t@MSG[0],$TMP\n\tpalignr\t\t\\$4,@MSG[3],$TMP\n\tnop\n\tpaddd\t\t$TMP,@MSG[1]\n\tsha256msg1\t@MSG[3],@MSG[2]\n\tsha256rnds2\t$CDGH,$ABEF\n___\nfor($i=4;$i<16-3;$i++) {\n$code.=<<___;\n\tmovdqa\t\t$i*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[0],$Wi\n\tsha256msg2\t@MSG[0],@MSG[1]\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 16-19...\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tmovdqa\t\t@MSG[1],$TMP\n\tpalignr\t\t\\$4,@MSG[0],$TMP\n\tnop\n\tpaddd\t\t$TMP,@MSG[2]\n\tsha256msg1\t@MSG[0],@MSG[3]\n\tsha256rnds2\t$CDGH,$ABEF\n___\n\tpush(@MSG,shift(@MSG));\n}\n$code.=<<___;\n\tmovdqa\t\t13*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[0],$Wi\n\tsha256msg2\t@MSG[0],@MSG[1]\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 52-55\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tmovdqa\t\t@MSG[1],$TMP\n\tpalignr\t\t\\$4,@MSG[0],$TMP\n\tsha256rnds2\t$CDGH,$ABEF\n\tpaddd\t\t$TMP,@MSG[2]\n\n\tmovdqa\t\t14*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[1],$Wi\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 56-59\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tsha256msg2\t@MSG[1],@MSG[2]\n\tmovdqa\t\t$BSWAP,$TMP\n\tsha256rnds2\t$CDGH,$ABEF\n\n\tmovdqa\t\t15*16-0x80($Tbl),$Wi\n\tpaddd\t\t@MSG[2],$Wi\n\tnop\n\tsha256rnds2\t$ABEF,$CDGH\t\t# 60-63\n\tpshufd\t\t\\$0x0e,$Wi,$Wi\n\tdec\t\t$num\n\tnop\n\tsha256rnds2\t$CDGH,$ABEF\n\n\tpaddd\t\t$CDGH_SAVE,$CDGH\n\tpaddd\t\t$ABEF_SAVE,$ABEF\n\tjnz\t\t.Loop_shaext\n\n\tpshufd\t\t\\$0xb1,$CDGH,$CDGH\t# DCHG\n\tpshufd\t\t\\$0x1b,$ABEF,$TMP\t# FEBA\n\tpshufd\t\t\\$0xb1,$ABEF,$ABEF\t# BAFE\n\tpunpckhqdq\t$CDGH,$ABEF\t\t# DCBA\n\tpalignr\t\t\\$8,$TMP,$CDGH\t\t# HGFE\n\n\tmovdqu\t$ABEF,($ctx)\n\tmovdqu\t$CDGH,16($ctx)\n___\n$code.=<<___ if ($win64);\n\tmovaps\t-0x50(%rbp),%xmm6\n\tmovaps\t-0x40(%rbp),%xmm7\n\tmovaps\t-0x30(%rbp),%xmm8\n\tmovaps\t-0x20(%rbp),%xmm9\n\tmovaps\t-0x10(%rbp),%xmm10\n\tmov\t%rbp,%rsp\n___\n$code.=<<___;\n.cfi_def_cfa_register\t%rsp\n\tpop\t%rbp\n.cfi_pop\t%rbp\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\t${pre}sha256_block_data_order_shaext,.-${pre}sha256_block_data_order_shaext\n___\n}}}\n{{{\n\nmy $a4=$T1;\nmy ($a,$b,$c,$d,$e,$f,$g,$h);\n\nsub AUTOLOAD()\t\t# thunk [simplified] 32-bit style perlasm\n{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://;\n  my $arg = pop;\n    $arg = \"\\$$arg\" if ($arg*1 eq $arg);\n    $code .= \"\\t$opcode\\t\".join(',',$arg,reverse @_).\"\\n\";\n}\n\nsub body_00_15 () {\n\t(\n\t'($a,$b,$c,$d,$e,$f,$g,$h)=@ROT;'.\n\n\t'&ror\t($a0,$Sigma1[2]-$Sigma1[1])',\n\t'&mov\t($a,$a1)',\n\t'&mov\t($a4,$f)',\n\n\t'&ror\t($a1,$Sigma0[2]-$Sigma0[1])',\n\t'&xor\t($a0,$e)',\n\t'&xor\t($a4,$g)',\t\t\t# f^g\n\n\t'&ror\t($a0,$Sigma1[1]-$Sigma1[0])',\n\t'&xor\t($a1,$a)',\n\t'&and\t($a4,$e)',\t\t\t# (f^g)&e\n\n\t'&xor\t($a0,$e)',\n\t'&add\t($h,$SZ*($i&15).\"(%rsp)\")',\t# h+=X[i]+K[i]\n\t'&mov\t($a2,$a)',\n\n\t'&xor\t($a4,$g)',\t\t\t# Ch(e,f,g)=((f^g)&e)^g\n\t'&ror\t($a1,$Sigma0[1]-$Sigma0[0])',\n\t'&xor\t($a2,$b)',\t\t\t# a^b, b^c in next round\n\n\t'&add\t($h,$a4)',\t\t\t# h+=Ch(e,f,g)\n\t'&ror\t($a0,$Sigma1[0])',\t\t# Sigma1(e)\n\t'&and\t($a3,$a2)',\t\t\t# (b^c)&(a^b)\n\n\t'&xor\t($a1,$a)',\n\t'&add\t($h,$a0)',\t\t\t# h+=Sigma1(e)\n\t'&xor\t($a3,$b)',\t\t\t# Maj(a,b,c)=Ch(a^b,c,b)\n\n\t'&ror\t($a1,$Sigma0[0])',\t\t# Sigma0(a)\n\t'&add\t($d,$h)',\t\t\t# d+=h\n\t'&add\t($h,$a3)',\t\t\t# h+=Maj(a,b,c)\n\n\t'&mov\t($a0,$d)',\n\t'&add\t($a1,$h);'.\t\t\t# h+=Sigma0(a)\n\t'($a2,$a3) = ($a3,$a2); unshift(@ROT,pop(@ROT)); $i++;'\n\t);\n}\n\n######################################################################\n# SSSE3 code path\n#\n{\nmy $Tbl = $inp;\nmy $_ctx=\"-64(%rbp)\";\nmy $_inp=\"-56(%rbp)\";\nmy $_end=\"-48(%rbp)\";\nmy $framesz=3*8+$win64*16*4;\n\nmy @X = map(\"%xmm$_\",(0..3));\nmy ($t0,$t1,$t2,$t3, $t4,$t5) = map(\"%xmm$_\",(4..9));\n\n$code.=<<___;\n.globl\t${func}\n.hidden\t${func}\n.type\t${func},\\@function,3,\"unwind\"\n.align\t64\n${func}:\n.cfi_startproc\n\tpush\t%rbp\n.cfi_push\t%rbp\n\tmov\t%rsp,%rbp\n.cfi_def_cfa_register\t%rbp\n#ifndef\t__SGX_LVI_HARDENING__\n\ttestl\t\\$2,__blst_platform_cap(%rip)\n\tjnz\t.L${func}\\$2\n#endif\n\tpush\t%rbx\n.cfi_push\t%rbx\n\tpush\t%r12\n.cfi_push\t%r12\n\tpush\t%r13\n.cfi_push\t%r13\n\tpush\t%r14\n.cfi_push\t%r14\n\tpush\t%r15\n.cfi_push\t%r15\n\tshl\t\\$4,%rdx\t\t# num*16\n\tsub\t\\$$framesz,%rsp\n.cfi_alloca\t$framesz\n\tlea\t($inp,%rdx,$SZ),%rdx\t# inp+num*16*$SZ\n\tmov\t$ctx,$_ctx\t\t# save ctx, 1st arg\n\t#mov\t$inp,$_inp\t\t# save inp, 2nd arg\n\tmov\t%rdx,$_end\t\t# save end pointer, \"3rd\" arg\n___\n$code.=<<___ if ($win64);\n\tmovaps\t%xmm6,-0x80(%rbp)\n\tmovaps\t%xmm7,-0x70(%rbp)\n\tmovaps\t%xmm8,-0x60(%rbp)\n\tmovaps\t%xmm9,-0x50(%rbp)\n.cfi_offset\t%xmm6-%xmm9,-0x90\n___\n$code.=<<___;\n.cfi_end_prologue\n\n\tlea\t-16*$SZ(%rsp),%rsp\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t$SZ*0($ctx),$A\n\tand\t\\$-64,%rsp\t\t# align stack\n\tmov\t$SZ*1($ctx),$B\n\tmov\t$SZ*2($ctx),$C\n\tmov\t$SZ*3($ctx),$D\n\tmov\t$SZ*4($ctx),$E\n\tmov\t$SZ*5($ctx),$F\n\tmov\t$SZ*6($ctx),$G\n\tmov\t$SZ*7($ctx),$H\n___\n\n$code.=<<___;\n\t#movdqa\t$TABLE+`$SZ*$rounds`+32(%rip),$t4\n\t#movdqa\t$TABLE+`$SZ*$rounds`+64(%rip),$t5\n\tjmp\t.Lloop_ssse3\n.align\t16\n.Lloop_ssse3:\n\tmovdqa\t$TABLE+`$SZ*$rounds`(%rip),$t3\n\tmov\t$inp,$_inp\t\t# offload $inp\n\tmovdqu\t0x00($inp),@X[0]\n\tmovdqu\t0x10($inp),@X[1]\n\tmovdqu\t0x20($inp),@X[2]\n\tpshufb\t$t3,@X[0]\n\tmovdqu\t0x30($inp),@X[3]\n\tlea\t$TABLE(%rip),$Tbl\n\tpshufb\t$t3,@X[1]\n\tmovdqa\t0x00($Tbl),$t0\n\tmovdqa\t0x10($Tbl),$t1\n\tpshufb\t$t3,@X[2]\n\tpaddd\t@X[0],$t0\n\tmovdqa\t0x20($Tbl),$t2\n\tpshufb\t$t3,@X[3]\n\tmovdqa\t0x30($Tbl),$t3\n\tpaddd\t@X[1],$t1\n\tpaddd\t@X[2],$t2\n\tpaddd\t@X[3],$t3\n\tmovdqa\t$t0,0x00(%rsp)\n\tmov\t$A,$a1\n\tmovdqa\t$t1,0x10(%rsp)\n\tmov\t$B,$a3\n\tmovdqa\t$t2,0x20(%rsp)\n\txor\t$C,$a3\t\t\t# magic\n\tmovdqa\t$t3,0x30(%rsp)\n\tmov\t$E,$a0\n\tjmp\t.Lssse3_00_47\n\n.align\t16\n.Lssse3_00_47:\n\tsub\t\\$`-16*$SZ`,$Tbl\t# size optimization\n___\nsub Xupdate_256_SSSE3 () {\n\t(\n\t'&movdqa\t($t0,@X[1]);',\n\t'&movdqa\t($t3,@X[3])',\n\t'&palignr\t($t0,@X[0],$SZ)',\t# X[1..4]\n\t '&palignr\t($t3,@X[2],$SZ);',\t# X[9..12]\n\t'&movdqa\t($t1,$t0)',\n\t'&movdqa\t($t2,$t0);',\n\t'&psrld\t\t($t0,$sigma0[2])',\n\t '&paddd\t(@X[0],$t3);',\t\t# X[0..3] += X[9..12]\n\t'&psrld\t\t($t2,$sigma0[0])',\n\t '&pshufd\t($t3,@X[3],0b11111010)',# X[14..15]\n\t'&pslld\t\t($t1,8*$SZ-$sigma0[1]);'.\n\t'&pxor\t\t($t0,$t2)',\n\t'&psrld\t\t($t2,$sigma0[1]-$sigma0[0]);'.\n\t'&pxor\t\t($t0,$t1)',\n\t'&pslld\t\t($t1,$sigma0[1]-$sigma0[0]);'.\n\t'&pxor\t\t($t0,$t2);',\n\t '&movdqa\t($t2,$t3)',\n\t'&pxor\t\t($t0,$t1);',\t\t# sigma0(X[1..4])\n\t '&psrld\t($t3,$sigma1[2])',\n\t'&paddd\t\t(@X[0],$t0);',\t\t# X[0..3] += sigma0(X[1..4])\n\t '&psrlq\t($t2,$sigma1[0])',\n\t '&pxor\t\t($t3,$t2);',\n\t '&psrlq\t($t2,$sigma1[1]-$sigma1[0])',\n\t '&pxor\t\t($t3,$t2)',\n\t '&pshufb\t($t3,$t4)',\t\t# sigma1(X[14..15])\n\t'&paddd\t\t(@X[0],$t3)',\t\t# X[0..1] += sigma1(X[14..15])\n\t '&pshufd\t($t3,@X[0],0b01010000)',# X[16..17]\n\t '&movdqa\t($t2,$t3);',\n\t '&psrld\t($t3,$sigma1[2])',\n\t '&psrlq\t($t2,$sigma1[0])',\n\t '&pxor\t\t($t3,$t2);',\n\t '&psrlq\t($t2,$sigma1[1]-$sigma1[0])',\n\t '&pxor\t\t($t3,$t2);',\n\t'&movdqa\t($t2,16*$j.\"($Tbl)\")',\n\t '&pshufb\t($t3,$t5)',\n\t'&paddd\t\t(@X[0],$t3)'\t\t# X[2..3] += sigma1(X[16..17])\n\t);\n}\n\nsub SSSE3_256_00_47 () {\nmy $j = shift;\nmy $body = shift;\nmy @X = @_;\nmy @insns = (&$body,&$body,&$body,&$body);\t# 104 instructions\n\n    if (0) {\n\tforeach (Xupdate_256_SSSE3()) {\t\t# 36 instructions\n\t    eval;\n\t    eval(shift(@insns));\n\t    eval(shift(@insns));\n\t    eval(shift(@insns));\n\t}\n    } else {\t\t\t# squeeze extra 4% on Westmere and 19% on Atom\n\t  eval(shift(@insns));\t#@\n\t&movdqa\t\t($t0,@X[1]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&movdqa\t\t($t3,@X[3]);\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t&palignr\t($t0,@X[0],$SZ);\t# X[1..4]\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &palignr\t($t3,@X[2],$SZ);\t# X[9..12]\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t&movdqa\t\t($t1,$t0);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&movdqa\t\t($t2,$t0);\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t&psrld\t\t($t0,$sigma0[2]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &paddd\t\t(@X[0],$t3);\t\t# X[0..3] += X[9..12]\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t&psrld\t\t($t2,$sigma0[0]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &pshufd\t($t3,@X[3],0b11111010);\t# X[4..15]\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t&pslld\t\t($t1,8*$SZ-$sigma0[1]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&pxor\t\t($t0,$t2);\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t&psrld\t\t($t2,$sigma0[1]-$sigma0[0]);\n\t  eval(shift(@insns));\n\t&pxor\t\t($t0,$t1);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&pslld\t\t($t1,$sigma0[1]-$sigma0[0]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&pxor\t\t($t0,$t2);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t &movdqa\t($t2,$t3);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&pxor\t\t($t0,$t1);\t\t# sigma0(X[1..4])\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &psrld\t\t($t3,$sigma1[2]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&paddd\t\t(@X[0],$t0);\t\t# X[0..3] += sigma0(X[1..4])\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t &psrlq\t\t($t2,$sigma1[0]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &pxor\t\t($t3,$t2);\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t &psrlq\t\t($t2,$sigma1[1]-$sigma1[0]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &pxor\t\t($t3,$t2);\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t #&pshufb\t($t3,$t4);\t\t# sigma1(X[14..15])\n\t &pshufd\t($t3,$t3,0b10000000);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &psrldq\t($t3,8);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t&paddd\t\t(@X[0],$t3);\t\t# X[0..1] += sigma1(X[14..15])\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &pshufd\t($t3,@X[0],0b01010000);\t# X[16..17]\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t &movdqa\t($t2,$t3);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &psrld\t\t($t3,$sigma1[2]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t &psrlq\t\t($t2,$sigma1[0]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &pxor\t\t($t3,$t2);\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t &psrlq\t\t($t2,$sigma1[1]-$sigma1[0]);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t &pxor\t\t($t3,$t2);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\t#@\n\t #&pshufb\t($t3,$t5);\n\t &pshufd\t($t3,$t3,0b00001000);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&movdqa\t\t($t2,16*$j.\"($Tbl)\");\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t &pslldq\t($t3,8);\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n\t&paddd\t\t(@X[0],$t3);\t\t# X[2..3] += sigma1(X[16..17])\n\t  eval(shift(@insns));\t#@\n\t  eval(shift(@insns));\n\t  eval(shift(@insns));\n    }\n\t&paddd\t\t($t2,@X[0]);\n\t  foreach (@insns) { eval; }\t\t# remaining instructions\n\t&movdqa\t\t(16*$j.\"(%rsp)\",$t2);\n}\n\n    for ($i=0,$j=0; $j<4; $j++) {\n\t&SSSE3_256_00_47($j,\\&body_00_15,@X);\n\tpush(@X,shift(@X));\t\t\t# rotate(@X)\n    }\n\t&cmpb\t($SZ-1+16*$SZ.\"($Tbl)\",0);\n\t&jne\t(\".Lssse3_00_47\");\n\n    for ($i=0; $i<16; ) {\n\tforeach(body_00_15()) { eval; }\n    }\n$code.=<<___;\n\tmov\t$_ctx,$ctx\n\tmov\t$a1,$A\n\tmov\t$_inp,$inp\n\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tadd\t$SZ*0($ctx),$A\n\tadd\t$SZ*1($ctx),$B\n\tadd\t$SZ*2($ctx),$C\n\tadd\t$SZ*3($ctx),$D\n\tadd\t$SZ*4($ctx),$E\n\tadd\t$SZ*5($ctx),$F\n\tadd\t$SZ*6($ctx),$G\n\tadd\t$SZ*7($ctx),$H\n\n\tlea\t16*$SZ($inp),$inp\n\tcmp\t$_end,$inp\n\n\tmov\t$A,$SZ*0($ctx)\n\tmov\t$B,$SZ*1($ctx)\n\tmov\t$C,$SZ*2($ctx)\n\tmov\t$D,$SZ*3($ctx)\n\tmov\t$E,$SZ*4($ctx)\n\tmov\t$F,$SZ*5($ctx)\n\tmov\t$G,$SZ*6($ctx)\n\tmov\t$H,$SZ*7($ctx)\n\tjb\t.Lloop_ssse3\n\n\txorps\t%xmm0, %xmm0\n\tmovaps\t%xmm0, 0x00(%rsp)\t# scrub the stack\n\tmovaps\t%xmm0, 0x10(%rsp)\n\tmovaps\t%xmm0, 0x20(%rsp)\n\tmovaps\t%xmm0, 0x30(%rsp)\n___\n$code.=<<___ if ($win64);\n\tmovaps\t-0x80(%rbp),%xmm6\n\tmovaps\t-0x70(%rbp),%xmm7\n\tmovaps\t-0x60(%rbp),%xmm8\n\tmovaps\t-0x50(%rbp),%xmm9\n___\n$code.=<<___;\n\tmov\t-40(%rbp),%r15\n\tmov\t-32(%rbp),%r14\n\tmov\t-24(%rbp),%r13\n\tmov\t-16(%rbp),%r12\n\tmov\t-8(%rbp),%rbx\n\tmov\t%rbp,%rsp\n.cfi_def_cfa_register\t%rsp\n\tpop\t%rbp\n.cfi_pop\t%rbp\n.cfi_epilogue\n\tret\n.cfi_endproc\n.size\t${func},.-${func}\n___\n}\n}}}\n{\nmy ($out,$inp,$len) = $win64 ? (\"%rcx\",\"%rdx\",\"%r8\") :  # Win64 order\n                               (\"%rdi\",\"%rsi\",\"%rdx\");  # Unix order\n$code.=<<___;\n.globl\t${pre}sha256_emit\n.hidden\t${pre}sha256_emit\n.type\t${pre}sha256_emit,\\@abi-omnipotent\n.align\t16\n${pre}sha256_emit:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t0($inp), %r8\n\tmov\t8($inp), %r9\n\tmov\t16($inp), %r10\n\tbswap\t%r8\n\tmov\t24($inp), %r11\n\tbswap\t%r9\n\tmov\t%r8d, 4($out)\n\tbswap\t%r10\n\tmov\t%r9d, 12($out)\n\tbswap\t%r11\n\tmov\t%r10d, 20($out)\n\tshr\t\\$32, %r8\n\tmov\t%r11d, 28($out)\n\tshr\t\\$32, %r9\n\tmov\t%r8d, 0($out)\n\tshr\t\\$32, %r10\n\tmov\t%r9d, 8($out)\n\tshr\t\\$32, %r11\n\tmov\t%r10d, 16($out)\n\tmov\t%r11d, 24($out)\n\tret\n.size\t${pre}sha256_emit,.-${pre}sha256_emit\n\n.globl\t${pre}sha256_bcopy\n.hidden\t${pre}sha256_bcopy\n.type\t${pre}sha256_bcopy,\\@abi-omnipotent\n.align\t16\n${pre}sha256_bcopy:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tsub\t$inp, $out\n.Loop_bcopy:\n\tmovzb\t($inp), %eax\n\tlea\t1($inp), $inp\n\tmov\t%al, -1($out,$inp)\n\tdec\t$len\n\tjnz\t.Loop_bcopy\n\tret\n.size\t${pre}sha256_bcopy,.-${pre}sha256_bcopy\n\n.globl\t${pre}sha256_hcopy\n.hidden\t${pre}sha256_hcopy\n.type\t${pre}sha256_hcopy,\\@abi-omnipotent\n.align\t16\n${pre}sha256_hcopy:\n#ifdef\t__SGX_LVI_HARDENING__\n\tlfence\n#endif\n\tmov\t0($inp), %r8\n\tmov\t8($inp), %r9\n\tmov\t16($inp), %r10\n\tmov\t24($inp), %r11\n\tmov\t%r8, 0($out)\n\tmov\t%r9, 8($out)\n\tmov\t%r10, 16($out)\n\tmov\t%r11, 24($out)\n\tret\n.size\t${pre}sha256_hcopy,.-${pre}sha256_hcopy\n___\n}\n\nsub sha256op38 {\n    my $instr = shift;\n    my %opcodelet = (\n\t\t\"sha256rnds2\" => 0xcb,\n  \t\t\"sha256msg1\"  => 0xcc,\n\t\t\"sha256msg2\"  => 0xcd\t);\n\n    if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-7]),\\s*%xmm([0-7])/) {\n      my @opcode=(0x0f,0x38);\n\tpush @opcode,$opcodelet{$instr};\n\tpush @opcode,0xc0|($1&7)|(($2&7)<<3);\t\t# ModR/M\n\treturn \".byte\\t\".join(',',@opcode);\n    } else {\n\treturn $instr.\"\\t\".@_[0];\n    }\n}\n\nforeach (split(\"\\n\",$code)) {\n\ts/\\`([^\\`]*)\\`/eval $1/geo;\n\n\ts/\\b(sha256[^\\s]*)\\s+(.*)/sha256op38($1,$2)/geo;\n\n\tprint $_,\"\\n\";\n}\nclose STDOUT;\n"
  },
  {
    "path": "src/asm/x86_64-xlate.pl",
    "content": "#!/usr/bin/env perl\n#\n# Copyright Supranational LLC\n# Licensed under the Apache License, Version 2.0, see LICENSE for details.\n# SPDX-License-Identifier: Apache-2.0\n#\n# Ascetic x86_64 AT&T to MASM/NASM assembler translator by @dot-asm.\n#\n# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T\n# format is way easier to parse. Because it's simpler to \"gear\" from\n# Unix ABI to Windows one [see cross-reference \"card\" at the end of\n# file]. Because Linux targets were available first...\n#\n# In addition the script also \"distills\" code suitable for GNU\n# assembler, so that it can be compiled with more rigid assemblers,\n# such as Solaris /usr/ccs/bin/as.\n#\n# This translator is not designed to convert *arbitrary* assembler\n# code from AT&T format to MASM one. It's designed to convert just\n# enough to provide for dual-ABI OpenSSL modules development...\n# There *are* limitations and you might have to modify your assembler\n# code or this script to achieve the desired result...\n#\n# Currently recognized limitations:\n#\n# - can't use multiple ops per line;\n#\n# Dual-ABI styling rules.\n#\n# 1. Adhere to Unix register and stack layout [see cross-reference\n#    ABI \"card\" at the end for explanation].\n# 2. Forget about \"red zone,\" stick to more traditional blended\n#    stack frame allocation. If volatile storage is actually required\n#    that is. If not, just leave the stack as is.\n# 3. Functions tagged with \".type name,@function\" get crafted with\n#    unified Win64 prologue and epilogue automatically. If you want\n#    to take care of ABI differences yourself, tag functions as\n#    \".type name,@abi-omnipotent\" instead.\n# 4. To optimize the Win64 prologue you can specify number of input\n#    arguments as \".type name,@function,N.\" Keep in mind that if N is\n#    larger than 6, then you *have to* write \"abi-omnipotent\" code,\n#    because >6 cases can't be addressed with unified prologue.\n# 5. Name local labels as .L*, do *not* use dynamic labels such as 1:\n#    (sorry about latter).\n# 6. Don't use [or hand-code with .byte] \"rep ret.\" \"ret\" mnemonic is\n#    required to identify the spots, where to inject Win64 epilogue!\n#    But on the pros, it's then prefixed with rep automatically:-)\n# 7. Stick to explicit ip-relative addressing. If you have to use\n#    GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??.\n#    Both are recognized and translated to proper Win64 addressing\n#    modes.\n#\n# 8. In order to provide for structured exception handling unified\n#    Win64 prologue copies %rsp value to %rax. [Unless function is\n#    tagged with additional .type tag.] For further details see SEH\n#    paragraph at the end.\n# 9. .init segment is allowed to contain calls to functions only.\n# a. If function accepts more than 4 arguments *and* >4th argument\n#    is declared as non 64-bit value, do clear its upper part.\n\f\n\nuse strict;\n\nmy $flavour = shift;\nmy $output  = shift;\nif ($flavour =~ /\\./) { $output = $flavour; undef $flavour; }\n\nopen STDOUT,\">$output\" || die \"can't open $output: $!\"\n\tif (defined($output));\n\nmy $gas=1;\t$gas=0 if ($output =~ /\\.asm$/);\nmy $elf=1;\t$elf=0 if (!$gas);\nmy $dwarf=$elf;\nmy $win64=0;\nmy $prefix=\"\";\nmy $decor=\".L\";\n\nmy $masmref=8 + 50727*2**-32;\t# 8.00.50727 shipped with VS2005\nmy $masm=0;\nmy $PTR=\" PTR\";\n\nmy $nasmref=2.03;\nmy $nasm=0;\n\nif    ($flavour eq \"mingw64\")\t{ $gas=1; $elf=0; $win64=1;\n\t\t\t\t  $prefix=`echo __USER_LABEL_PREFIX__ | \\${CC:-false} -E -P -`;\n\t\t\t\t  $prefix =~ s|\\R$||; # Better chomp\n\t\t\t\t}\nelsif ($flavour eq \"macosx\")\t{ $gas=1; $elf=0; $prefix=\"_\"; $decor=\"L\\$\"; }\nelsif ($flavour eq \"masm\")\t{ $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor=\"\\$L\\$\"; }\nelsif ($flavour eq \"nasm\")\t{ $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor=\"\\$L\\$\"; $PTR=\"\"; }\nelsif (!$gas)\n{   if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\\.([0-9]+)/i)\n    {\t$nasm = $1 + $2*0.01; $PTR=\"\";  }\n    elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\\.([0-9]+)(\\.([0-9]+))?/)\n    {\t$masm = $1 + $2*2**-16 + $4*2**-32;   }\n    die \"no assembler found on %PATH%\" if (!($nasm || $masm));\n    $win64=1;\n    $elf=0;\n    $decor=\"\\$L\\$\";\n}\nmy $colon= $masm ? \"::\" : \":\";\n\n$dwarf=0 if($win64);\n\nmy $current_segment;\nmy $current_function;\nmy %globals;\nmy $ret_clobber;\n\n{ package opcode;\t# pick up opcodes\n    sub re {\n\tmy\t($class, $line) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\tif ($$line =~ /^([a-z][a-z0-9]*)/i) {\n\t    bless $self,$class;\n\t    $self->{op} = $1;\n\t    $ret = $self;\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\n\t    undef $self->{sz};\n\t    if ($self->{op} =~ /^(movz)x?([bw]).*/) {\t# movz is pain...\n\t\t$self->{op} = $1;\n\t\t$self->{sz} = $2;\n\t    } elsif ($self->{op} =~ /cmov[n]?[lb]$/) {\n\t\t# pass through\n\t    } elsif ($self->{op} =~ /call|jmp/) {\n\t\t$self->{sz} = \"\";\n\t    } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn\n\t\t$self->{sz} = \"\";\n\t    } elsif ($self->{op} =~ /^[vk]/) { # VEX or k* such as kmov\n\t\t$self->{sz} = \"\";\n\t    } elsif ($self->{op} =~ /mov[dq]/ && $$line =~ /%xmm/) {\n\t\t$self->{sz} = \"\";\n\t    } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {\n\t\t$self->{op} = $1;\n\t\t$self->{sz} = $2;\n\t    }\n\t}\n\t$ret;\n    }\n    sub size {\n\tmy ($self, $sz) = @_;\n\t$self->{sz} = $sz if (defined($sz) && !defined($self->{sz}));\n\t$self->{sz};\n    }\n    sub out {\n\tmy $self = shift;\n\tif ($gas) {\n\t    if ($self->{op} eq \"movz\") {\t# movz is pain...\n\t\tsprintf \"%s%s%s\",$self->{op},$self->{sz},shift;\n\t    } elsif ($self->{op} =~ /^set/) {\n\t\t\"$self->{op}\";\n\t    } elsif ($self->{op} eq \"ret\") {\n\t\tmy $epilogue = \"\";\n\t\tmy $reg = $ret_clobber || \"rdx\";\n\t\t$ret_clobber = undef;\n\t\tif ($win64 && $current_function->{abi} eq \"svr4\"\n\t\t\t   && !$current_function->{unwind}) {\n\t\t    $epilogue = \"movq\t8(%rsp),%rdi\\n\\t\" .\n\t\t\t\t\"movq\t16(%rsp),%rsi\\n\\t\";\n\t\t}\n\t\t$epilogue . \"\\n#ifdef\t__SGX_LVI_HARDENING__\\n\".\n\t\t\t\t\"\tpopq\t%$reg\\n\"\t\t.\n\t\t\t\t\"\tlfence\\n\"\t\t.\n\t\t\t\t\"\tjmpq\t*%$reg\\n\"\t.\n\t\t\t\t\"\tud2\\n\"\t\t\t.\n\t\t\t\t\"#else\\n\"\t\t\t.\n\t\t\t\t\"\t.byte\t0xf3,0xc3\\n\"\t.\n\t\t\t\t\"#endif\";\n\t    } elsif ($self->{op} eq \"call\" && !$elf && $current_segment eq \".init\") {\n\t\t\".p2align\\t3\\n\\t.quad\";\n\t    } else {\n\t\t\"$self->{op}$self->{sz}\";\n\t    }\n\t} else {\n\t    $self->{op} =~ s/^movz/movzx/;\n\t    if ($self->{op} eq \"ret\") {\n\t\t$self->{op} = \"\";\n\t\tmy $reg = $ret_clobber || \"rdx\";\n\t\t$ret_clobber = undef;\n\t\tif ($win64 && $current_function->{abi} eq \"svr4\"\n\t\t\t   && !$current_function->{unwind}) {\n\t\t    $self->{op} = \"mov\trdi,QWORD$PTR\\[8+rsp\\]\\t;WIN64 epilogue\\n\\t\".\n\t\t\t\t  \"mov\trsi,QWORD$PTR\\[16+rsp\\]\\n\\t\";\n\t\t}\n\t\t$self->{op} .= \"\\nifdef\t__SGX_LVI_HARDENING__\\n\".\n\t\t\t\t\"\tpop\t$reg\\n\"\t\t.\n\t\t\t\t\"\tlfence\\n\"\t\t.\n\t\t\t\t\"\tjmp\t$reg\\n\"\t\t.\n\t\t\t\t\"\tud2\\n\"\t\t\t.\n\t\t\t\t\"else\\n\"\t\t\t.\n\t\t\t\t\"\tDB\\t0F3h,0C3h\\n\"\t.\n\t\t\t\t\"endif\";\n\t    } elsif ($self->{op} =~ /^(pop|push)f/) {\n\t\t$self->{op} .= $self->{sz};\n\t    } elsif ($self->{op} eq \"call\" && $current_segment eq \".CRT\\$XCU\") {\n\t\t$self->{op} = \"\\tDQ\";\n\t    }\n\t    $self->{op};\n\t}\n    }\n    sub mnemonic {\n\tmy ($self, $op) = @_;\n\t$self->{op}=$op if (defined($op));\n\t$self->{op};\n    }\n}\n{ package const;\t# pick up constants, which start with $\n    sub re {\n\tmy\t($class, $line) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\tif ($$line =~ /^\\$([^,]+)/) {\n\t    bless $self, $class;\n\t    $self->{value} = $1;\n\t    $ret = $self;\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\t}\n\t$ret;\n    }\n    sub out {\n\tmy $self = shift;\n\n\t$self->{value} =~ s/\\b(0b[0-1]+)/oct($1)/eig;\n\tif ($gas) {\n\t    # Solaris /usr/ccs/bin/as can't handle multiplications\n\t    # in $self->{value}\n\t    my $value = $self->{value};\n\t    no warnings;    # oct might complain about overflow, ignore here...\n\t    $value =~ s/(?<![\\w\\$\\.])(0x?[0-9a-f]+)/oct($1)/egi;\n\t    if ($value =~ s/([0-9]+\\s*[\\*\\/\\%]\\s*[0-9]+)/eval($1)/eg) {\n\t\t$self->{value} = $value;\n\t    }\n\t    sprintf \"\\$%s\",$self->{value};\n\t} else {\n\t    my $value = $self->{value};\n\t    $value =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm);\n\t    sprintf \"%s\",$value;\n\t}\n    }\n}\n{ package ea;\t\t# pick up effective addresses: expr(%reg,%reg,scale)\n\n    my %szmap = (\tb=>\"BYTE$PTR\",    w=>\"WORD$PTR\",\n\t\t\tl=>\"DWORD$PTR\",   d=>\"DWORD$PTR\",\n\t\t\tq=>\"QWORD$PTR\",   o=>\"OWORD$PTR\",\n\t\t\tx=>\"XMMWORD$PTR\", y=>\"YMMWORD$PTR\",\n\t\t\tz=>\"ZMMWORD$PTR\" ) if (!$gas);\n\n    my %sifmap = (\tss=>\"d\",\tsd=>\"q\",\t# broadcast only\n\t\t\ti32x2=>\"q\",\tf32x2=>\"q\",\n\t\t\ti32x4=>\"x\",\ti64x2=>\"x\",\ti128=>\"x\",\n\t\t\tf32x4=>\"x\",\tf64x2=>\"x\",\tf128=>\"x\",\n\t\t\ti32x8=>\"y\",\ti64x4=>\"y\",\n\t\t\tf32x8=>\"y\",\tf64x4=>\"y\" ) if (!$gas);\n\n    sub re {\n\tmy\t($class, $line, $opcode) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\t# optional * ----vvv--- appears in indirect jmp/call\n\tif ($$line =~ /^(\\*?)([^\\(,]*)\\(([%\\w,\\s]+)\\)((?:{[^}]+})*)/) {\n\t    bless $self, $class;\n\t    $self->{asterisk} = $1;\n\t    $self->{label} = $2;\n\t    ($self->{base},$self->{index},$self->{scale})=split(/(?:,\\s*)/,$3);\n\t    $self->{scale} = 1 if (!defined($self->{scale}));\n\t    $self->{opmask} = $4;\n\t    $ret = $self;\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\n\t    if ($win64 && $self->{label} =~ s/\\@GOTPCREL//) {\n\t\tdie if ($opcode->mnemonic() ne \"mov\");\n\t\t$opcode->mnemonic(\"lea\");\n\t    }\n\t    $self->{base}  =~ s/^%//;\n\t    $self->{index} =~ s/^%// if (defined($self->{index}));\n\t    $self->{opcode} = $opcode;\n\t}\n\t$ret;\n    }\n    sub size {}\n    sub out {\n\tmy ($self, $sz) = @_;\n\n\t$self->{label} =~ s/([_a-z][_a-z0-9\\$]*)/$globals{$1} or $1/gei;\n\t$self->{label} =~ s/\\.L/$decor/g;\n\n\t# Silently convert all EAs to 64-bit. This is required for\n\t# elder GNU assembler and results in more compact code,\n\t# *but* most importantly AES module depends on this feature!\n\t$self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\\1/;\n\t$self->{base}  =~ s/^[er](.?[0-9xpi])[d]?$/r\\1/;\n\n\t# Solaris /usr/ccs/bin/as can't handle multiplications\n\t# in $self->{label}...\n\tuse integer;\n\t$self->{label} =~ s/(?<![\\w\\$\\.])(0x?[0-9a-f]+)/oct($1)/egi;\n\t$self->{label} =~ s/\\b([0-9]+\\s*[\\*\\/\\%]\\s*[0-9]+)\\b/eval($1)/eg;\n\n\t# Some assemblers insist on signed presentation of 32-bit\n\t# offsets, but sign extension is a tricky business in perl...\n\t$self->{label} =~ s/\\b([0-9]+)\\b/unpack(\"l\",pack(\"L\",$1))/eg;\n\n\t# if base register is %rbp or %r13, see if it's possible to\n\t# flip base and index registers [for better performance]\n\tif (!$self->{label} && $self->{index} && $self->{scale}==1 &&\n\t    $self->{base} =~ /(rbp|r13)/) {\n\t\t$self->{base} = $self->{index}; $self->{index} = $1;\n\t}\n\n\tif ($gas) {\n\t    $self->{label} =~ s/^___imp_/__imp__/   if ($flavour eq \"mingw64\");\n\n\t    if (defined($self->{index})) {\n\t\tsprintf \"%s%s(%s,%%%s,%d)%s\",\n\t\t\t\t\t$self->{asterisk},$self->{label},\n\t\t\t\t\t$self->{base}?\"%$self->{base}\":\"\",\n\t\t\t\t\t$self->{index},$self->{scale},\n\t\t\t\t\t$self->{opmask};\n\t    } else {\n\t\tsprintf \"%s%s(%%%s)%s\",\t$self->{asterisk},$self->{label},\n\t\t\t\t\t$self->{base},$self->{opmask};\n\t    }\n\t} else {\n\t    $self->{label} =~ s/\\./\\$/g;\n\t    $self->{label} =~ s/(?<![\\w\\$\\.])0x([0-9a-f]+)/0$1h/ig;\n\t    $self->{label} = \"($self->{label})\" if ($self->{label} =~ /[\\*\\+\\-\\/]/);\n\n\t    my $mnemonic = $self->{opcode}->mnemonic();\n\t    ($self->{asterisk})\t\t\t\t&& ($sz=\"q\") ||\n\t    ($mnemonic =~ /^v?mov([qd])$/)\t\t&& ($sz=$1)  ||\n\t    ($mnemonic =~ /^v?pinsr([qdwb])$/)\t\t&& ($sz=$1)  ||\n\t    ($mnemonic =~ /^vpbroadcast([qdwb])$/)\t&& ($sz=$1)  ||\n\t    ($mnemonic =~ /^v(?:broadcast|extract|insert)([sif]\\w+)$/)\n\t\t\t\t\t\t\t&& ($sz=$sifmap{$1});\n\n\t    $self->{opmask}  =~ s/%(k[0-7])/$1/;\n\n\t    if (defined($self->{index})) {\n\t\tsprintf \"%s[%s%s*%d%s]%s\",$szmap{$sz},\n\t\t\t\t\t$self->{label}?\"$self->{label}+\":\"\",\n\t\t\t\t\t$self->{index},$self->{scale},\n\t\t\t\t\t$self->{base}?\"+$self->{base}\":\"\",\n\t\t\t\t\t$self->{opmask};\n\t    } elsif ($self->{base} eq \"rip\") {\n\t\tsprintf \"%s[%s]\",$szmap{$sz},$self->{label};\n\t    } else {\n\t\tsprintf \"%s[%s%s]%s\",\t$szmap{$sz},\n\t\t\t\t\t$self->{label}?\"$self->{label}+\":\"\",\n\t\t\t\t\t$self->{base},$self->{opmask};\n\t    }\n\t}\n    }\n}\n{ package register;\t# pick up registers, which start with %.\n    sub re {\n\tmy\t($class, $line, $opcode) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\t# optional * ----vvv--- appears in indirect jmp/call\n\tif ($$line =~ /^(\\*?)%(\\w+)((?:{[^}]+})*)/) {\n\t    bless $self,$class;\n\t    $self->{asterisk} = $1;\n\t    $self->{value} = $2;\n\t    $self->{opmask} = $3;\n\t    $opcode->size($self->size());\n\t    $ret = $self;\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\t}\n\t$ret;\n    }\n    sub size {\n\tmy\t$self = shift;\n\tmy\t$ret;\n\n\tif    ($self->{value} =~ /^r[\\d]+b$/i)\t{ $ret=\"b\"; }\n\telsif ($self->{value} =~ /^r[\\d]+w$/i)\t{ $ret=\"w\"; }\n\telsif ($self->{value} =~ /^r[\\d]+d$/i)\t{ $ret=\"l\"; }\n\telsif ($self->{value} =~ /^r[\\w]+$/i)\t{ $ret=\"q\"; }\n\telsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret=\"b\"; }\n\telsif ($self->{value} =~ /^[\\w]{2}l$/i)\t{ $ret=\"b\"; }\n\telsif ($self->{value} =~ /^[\\w]{2}$/i)\t{ $ret=\"w\"; }\n\telsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret=\"l\"; }\n\n\t$ret;\n    }\n    sub out {\n\tmy $self = shift;\n\tif ($gas)\t{ sprintf \"%s%%%s%s\",\t$self->{asterisk},\n\t\t\t\t\t\t$self->{value},\n\t\t\t\t\t\t$self->{opmask}; }\n\telse\t\t{ $self->{opmask} =~ s/%(k[0-7])/$1/;\n\t\t\t  $self->{value}.$self->{opmask}; }\n    }\n}\n{ package label;\t# pick up labels, which end with :\n    sub re {\n\tmy\t($class, $line) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\tif ($$line =~ /(^[\\.\\w\\$]+)\\:/) {\n\t    bless $self,$class;\n\t    $self->{value} = $1;\n\t    $ret = $self;\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\n\t    $self->{value} =~ s/^(\\w+\\$\\w*)/$decor\\1/ if ($flavour eq \"macosx\");\n\t    $self->{value} =~ s/^\\.L/$decor/;\n\t}\n\t$ret;\n    }\n    sub win64_args {\n\tmy $narg = $current_function->{narg} // 6;\n\treturn undef if ($narg < 0);\n\tmy $arg5 = 4*8 - cfi_directive::cfa_rsp();\n\tmy $arg6 = $arg5 + 8;\n\tmy $args;\n\tif ($gas) {\n\t    $args .= \"\tmovq\t%rcx,%rdi\\n\" if ($narg>0);\n\t    $args .= \"\tmovq\t%rdx,%rsi\\n\" if ($narg>1);\n\t    $args .= \"\tmovq\t%r8,%rdx\\n\"  if ($narg>2);\n\t    $args .= \"\tmovq\t%r9,%rcx\\n\"  if ($narg>3);\n\t    $args .= \"\tmovq\t$arg5(%rsp),%r8\\n\" if ($narg>4);\n\t    $args .= \"\tmovq\t$arg6(%rsp),%r9\\n\" if ($narg>5);\n\t} else {\n\t    $args .= \"\tmov\trdi,rcx\\n\" if ($narg>0);\n\t    $args .= \"\tmov\trsi,rdx\\n\" if ($narg>1);\n\t    $args .= \"\tmov\trdx,r8\\n\"  if ($narg>2);\n\t    $args .= \"\tmov\trcx,r9\\n\"  if ($narg>3);\n\t    $args .= \"\tmov\tr8,QWORD$PTR\\[$arg5+rsp\\]\\n\" if ($narg>4);\n\t    $args .= \"\tmov\tr9,QWORD$PTR\\[$arg6+rsp\\]\\n\" if ($narg>5);\n\t}\n\t$current_function->{narg} = -1;\n\t$args;\n    }\n    sub out {\n\tmy $self = shift;\n\n\tif ($gas) {\n\t    my $func = ($globals{$self->{value}} or $self->{value}) . \":\";\n\t    if ($current_function->{name} eq $self->{value}) {\n\t\t$current_function->{pc} = 0;\n\t\t$func .= \"\\n.cfi_\".cfi_directive::startproc()   if ($dwarf);\n\t\t$func .= \"\\n\t.byte\t0xf3,0x0f,0x1e,0xfa\\n\";\t# endbranch\n\t\tif ($win64) {\n\t\t    if ($current_function->{abi} eq \"svr4\") {\n\t\t\tmy $fp = $current_function->{unwind} ? \"%r11\" : \"%rax\";\n\t\t\t$func .= \"\tmovq\t%rdi,8(%rsp)\\n\";\n\t\t\t$func .= \"\tmovq\t%rsi,16(%rsp)\\n\";\n\t\t\t$func .= \"\tmovq\t%rsp,$fp\\n\";\n\t\t\t$func .= \"${decor}SEH_begin_$current_function->{name}:\\n\";\n\t\t    } elsif ($current_function->{unwind}) {\n\t\t\t$func .= \"\tmovq\t%rsp,%r11\\n\";\n\t\t\t$func .= \"${decor}SEH_begin_$current_function->{name}:\\n\";\n\t\t    }\n\t\t}\n\t    } elsif ($win64 && $current_function->{abi} eq \"svr4\"\n\t\t\t    && $current_function->{pc} >= 0) {\n\t\t$func = win64_args().$func;\n\t    }\n\t    $func;\n\t} elsif ($self->{value} ne \"$current_function->{name}\") {\n\t    my $func;\n\t    if ($win64 && $current_function->{abi} eq \"svr4\"\n\t\t       && $current_function->{pc} >= 0) {\n\t\t$func = win64_args();\n\t    }\n\t    $func .= $self->{value} . $colon;\n\t    $func;\n\t} else {\n\t    $current_function->{pc} = 0;\n\t    my $func =\t\"$current_function->{name}\" .\n\t\t\t($nasm ? \":\" : \"\\tPROC $current_function->{scope}\") .\n\t\t\t\"\\n\";\n\t    $func .= \"\tDB\t243,15,30,250\\n\";\t# endbranch\n\t    if ($current_function->{abi} eq \"svr4\") {\n\t\tmy $fp = $current_function->{unwind} ? \"r11\" : \"rax\";\n\t\t$func .= \"\tmov\tQWORD$PTR\\[8+rsp\\],rdi\\t;WIN64 prologue\\n\";\n\t\t$func .= \"\tmov\tQWORD$PTR\\[16+rsp\\],rsi\\n\";\n\t\t$func .= \"\tmov\t$fp,rsp\\n\";\n\t\t$func .= \"${decor}SEH_begin_$current_function->{name}${colon}\\n\";\n\t    } elsif ($current_function->{unwind}) {\n\t\t$func .= \"\tmov\tr11,rsp\\n\";\n\t\t$func .= \"${decor}SEH_begin_$current_function->{name}${colon}\\n\";\n\t    }\n\t    $func;\n\t}\n    }\n}\n{ package expr;\t\t# pick up expressions\n    sub re {\n\tmy\t($class, $line, $opcode) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\tif ($$line =~ /(^[^,]+)/) {\n\t    bless $self,$class;\n\t    $self->{value} = $1;\n\t    $ret = $self;\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\n\t    $self->{value} =~ s/\\@PLT// if (!$elf);\n\t    $self->{value} =~ s/([_a-z][_a-z0-9\\$]*)/$globals{$1} or $1/gei;\n\t    if ($flavour eq \"macosx\" and $self->{value} !~ /\\.L/) {\n\t\t$self->{value} =~ s/(\\w+\\$\\w*)/$decor\\1/g;\n\t    }\n\t    $self->{value} =~ s/\\.L/$decor/g;\n\t    $self->{opcode} = $opcode;\n\t}\n\t$ret;\n    }\n    sub out {\n\tmy $self = shift;\n\t$self->{value};\n    }\n}\n\nmy @xdata_seg = (\".section\t.xdata\", \".align\t8\");\nmy @pdata_seg = (\".section\t.pdata\", \".align\t4\");\n\n{ package cfi_directive;\n    # CFI directives annotate instructions that are significant for\n    # stack unwinding procedure compliant with DWARF specification,\n    # see http://dwarfstd.org/. Besides naturally expected for this\n    # script platform-specific filtering function, this module adds\n    # four auxiliary synthetic directives not recognized by [GNU]\n    # assembler:\n    #\n    # - .cfi_push to annotate push instructions in prologue, which\n    #   translates to .cfi_adjust_cfa_offset (if needed) and\n    #   .cfi_offset;\n    # - .cfi_pop to annotate pop instructions in epilogue, which\n    #   translates to .cfi_adjust_cfa_offset (if needed) and\n    #   .cfi_restore;\n    # - .cfi_alloca to annotate stack pointer adjustments, which\n    #   translates to .cfi_adjust_cfa_offset as needed;\n    # - [and most notably] .cfi_cfa_expression which encodes\n    #   DW_CFA_def_cfa_expression and passes it to .cfi_escape as\n    #   byte vector;\n    #\n    # CFA expressions were introduced in DWARF specification version\n    # 3 and describe how to deduce CFA, Canonical Frame Address. This\n    # becomes handy if your stack frame is variable and you can't\n    # spare register for [previous] frame pointer. Suggested directive\n    # syntax is made-up mix of DWARF operator suffixes [subset of]\n    # and references to registers with optional bias. Following example\n    # describes offloaded *original* stack pointer at specific offset\n    # from *current* stack pointer:\n    #\n    #   .cfi_cfa_expression     %rsp+40,deref,+8\n    #\n    # Final +8 has everything to do with the fact that CFA is defined\n    # as reference to top of caller's stack, and on x86_64 call to\n    # subroutine pushes 8-byte return address. In other words original\n    # stack pointer upon entry to a subroutine is 8 bytes off from CFA.\n    #\n    # In addition the .cfi directives are re-purposed even for Win64\n    # stack unwinding. Two more synthetic directives were added:\n    #\n    # - .cfi_end_prologue to denote point when all non-volatile\n    #   registers are saved and stack or [chosen] frame pointer is\n    #   stable;\n    # - .cfi_epilogue to denote point when all non-volatile registers\n    #   are restored [and it even adds missing .cfi_restore-s];\n    #\n    # Though it's not universal \"miracle cure,\" it has its limitations.\n    # Most notably .cfi_cfa_expression won't start working... For more\n    # information see the end of this file.\n\n    # Below constants are taken from \"DWARF Expressions\" section of the\n    # DWARF specification, section is numbered 7.7 in versions 3 and 4.\n    my %DW_OP_simple = (\t# no-arg operators, mapped directly\n\tderef\t=> 0x06,\tdup\t=> 0x12,\n\tdrop\t=> 0x13,\tover\t=> 0x14,\n\tpick\t=> 0x15,\tswap\t=> 0x16,\n\trot\t=> 0x17,\txderef\t=> 0x18,\n\n\tabs\t=> 0x19,\tand\t=> 0x1a,\n\tdiv\t=> 0x1b,\tminus\t=> 0x1c,\n\tmod\t=> 0x1d,\tmul\t=> 0x1e,\n\tneg\t=> 0x1f,\tnot\t=> 0x20,\n\tor\t=> 0x21,\tplus\t=> 0x22,\n\tshl\t=> 0x24,\tshr\t=> 0x25,\n\tshra\t=> 0x26,\txor\t=> 0x27,\n\t);\n\n    my %DW_OP_complex = (\t# used in specific subroutines\n\tconstu\t\t=> 0x10,\t# uleb128\n\tconsts\t\t=> 0x11,\t# sleb128\n\tplus_uconst\t=> 0x23,\t# uleb128\n\tlit0 \t\t=> 0x30,\t# add 0-31 to opcode\n\treg0\t\t=> 0x50,\t# add 0-31 to opcode\n\tbreg0\t\t=> 0x70,\t# add 0-31 to opcole, sleb128\n\tregx\t\t=> 0x90,\t# uleb28\n\tfbreg\t\t=> 0x91,\t# sleb128\n\tbregx\t\t=> 0x92,\t# uleb128, sleb128\n\tpiece\t\t=> 0x93,\t# uleb128\n\t);\n\n    # Following constants are defined in x86_64 ABI supplement, for\n    # example available at https://www.uclibc.org/docs/psABI-x86_64.pdf,\n    # see section 3.7 \"Stack Unwind Algorithm\".\n    my %DW_reg_idx = (\n\t\"%rax\"=>0,  \"%rdx\"=>1,  \"%rcx\"=>2,  \"%rbx\"=>3,\n\t\"%rsi\"=>4,  \"%rdi\"=>5,  \"%rbp\"=>6,  \"%rsp\"=>7,\n\t\"%r8\" =>8,  \"%r9\" =>9,  \"%r10\"=>10, \"%r11\"=>11,\n\t\"%r12\"=>12, \"%r13\"=>13, \"%r14\"=>14, \"%r15\"=>15\n\t);\n\n    my ($cfa_reg, $cfa_off, $cfa_rsp, %saved_regs);\n    my @cfa_stack;\n\n    sub cfa_rsp { return $cfa_rsp // -8;  }\n\n    # [us]leb128 format is variable-length integer representation base\n    # 2^128, with most significant bit of each byte being 0 denoting\n    # *last* most significant digit. See \"Variable Length Data\" in the\n    # DWARF specification, numbered 7.6 at least in versions 3 and 4.\n    sub sleb128 {\n\tuse integer;\t# get right shift extend sign\n\n\tmy $val = shift;\n\tmy $sign = ($val < 0) ? -1 : 0;\n\tmy @ret = ();\n\n\twhile(1) {\n\t    push @ret, $val&0x7f;\n\n\t    # see if remaining bits are same and equal to most\n\t    # significant bit of the current digit, if so, it's\n\t    # last digit...\n\t    last if (($val>>6) == $sign);\n\n\t    @ret[-1] |= 0x80;\n\t    $val >>= 7;\n\t}\n\n\treturn @ret;\n    }\n    sub uleb128 {\n\tmy $val = shift;\n\tmy @ret = ();\n\n\twhile(1) {\n\t    push @ret, $val&0x7f;\n\n\t    # see if it's last significant digit...\n\t    last if (($val >>= 7) == 0);\n\n\t    @ret[-1] |= 0x80;\n\t}\n\n\treturn @ret;\n    }\n    sub const {\n\tmy $val = shift;\n\n\tif ($val >= 0 && $val < 32) {\n\t    return ($DW_OP_complex{lit0}+$val);\n\t}\n\treturn ($DW_OP_complex{consts}, sleb128($val));\n    }\n    sub reg {\n\tmy $val = shift;\n\n\treturn if ($val !~ m/^(%r\\w+)(?:([\\+\\-])((?:0x)?[0-9a-f]+))?/);\n\n\tmy $reg = $DW_reg_idx{$1};\n\tmy $off = eval (\"0 $2 $3\");\n\n\treturn (($DW_OP_complex{breg0} + $reg), sleb128($off));\n\t# Yes, we use DW_OP_bregX+0 to push register value and not\n\t# DW_OP_regX, because latter would require even DW_OP_piece,\n\t# which would be a waste under the circumstances. If you have\n\t# to use DWP_OP_reg, use \"regx:N\"...\n    }\n    sub cfa_expression {\n\tmy $line = shift;\n\tmy @ret;\n\n\tforeach my $token (split(/,\\s*/,$line)) {\n\t    if ($token =~ /^%r/) {\n\t\tpush @ret,reg($token);\n\t    } elsif ($token =~ /((?:0x)?[0-9a-f]+)\\((%r\\w+)\\)/) {\n\t\tpush @ret,reg(\"$2+$1\");\n\t    } elsif ($token =~ /(\\w+):(\\-?(?:0x)?[0-9a-f]+)(U?)/i) {\n\t\tmy $i = 1*eval($2);\n\t\tpush @ret,$DW_OP_complex{$1}, ($3 ? uleb128($i) : sleb128($i));\n\t    } elsif (my $i = 1*eval($token) or $token eq \"0\") {\n\t\tif ($token =~ /^\\+/) {\n\t\t    push @ret,$DW_OP_complex{plus_uconst},uleb128($i);\n\t\t} else {\n\t\t    push @ret,const($i);\n\t\t}\n\t    } else {\n\t\tpush @ret,$DW_OP_simple{$token};\n\t    }\n\t}\n\n\t# Finally we return DW_CFA_def_cfa_expression, 15, followed by\n\t# length of the expression and of course the expression itself.\n\treturn (15,scalar(@ret),@ret);\n    }\n\n    # Following constants are defined in \"x64 exception handling\" at\n    # https://docs.microsoft.com/ and match the register sequence in\n    # CONTEXT structure defined in winnt.h.\n    my %WIN64_reg_idx = (\n\t\"%rax\"=>0,  \"%rcx\"=>1,  \"%rdx\"=>2,  \"%rbx\"=>3,\n\t\"%rsp\"=>4,  \"%rbp\"=>5,  \"%rsi\"=>6,  \"%rdi\"=>7,\n\t\"%r8\" =>8,  \"%r9\" =>9,  \"%r10\"=>10, \"%r11\"=>11,\n\t\"%r12\"=>12, \"%r13\"=>13, \"%r14\"=>14, \"%r15\"=>15\n\t);\n    sub xdata {\n\tour @dat = ();\n\tour $len = 0;\n\n\tsub savereg {\n\t    my ($key, $offset) = @_;\n\n\t    if ($key =~ /%xmm([0-9]+)/) {\n\t\tif ($offset < 0x100000) {\n\t\t    push @dat, [0,($1<<4)|8,unpack(\"C2\",pack(\"v\",$offset>>4))];\n\t\t} else {\n\t\t    push @dat, [0,($1<<4)|9,unpack(\"C4\",pack(\"V\",$offset))];\n\t\t}\n\t    } else {\n\t\tif ($offset < 0x80000) {\n\t\t    push @dat, [0,(($WIN64_reg_idx{$key})<<4)|4,\n\t\t\t\tunpack(\"C2\",pack(\"v\",$offset>>3))];\n\t\t} else {\n\t\t    push @dat, [0,(($WIN64_reg_idx{$key})<<4)|5,\n\t\t\t\tunpack(\"C4\",pack(\"V\",$offset))];\n\t\t}\n\t    }\n\t    $len += $#{@dat[-1]}+1;\n\t}\n\n\tmy $fp_info = 0;\n\n\t# allocate stack frame\n\tif ($cfa_rsp < -8) {\n\t    my $offset = -8 - $cfa_rsp;\n\t    if ($cfa_reg ne \"%rsp\" && $saved_regs{$cfa_reg} == -16) {\n\t\t$fp_info = $WIN64_reg_idx{$cfa_reg};\n\t\tpush @dat, [0,$fp_info<<4];\t\t# UWOP_PUSH_NONVOL\n\t\t$len += $#{@dat[-1]}+1;\n\t\t$offset -= 8;\n\t    }\n\t    if ($offset <= 128) {\n\t\tmy $alloc = ($offset - 8) >> 3;\n\t\tpush @dat, [0,$alloc<<4|2];\t\t# UWOP_ALLOC_SMALL\n\t    } elsif ($offset < 0x80000) {\n\t\tpush @dat, [0,0x01,unpack(\"C2\",pack(\"v\",$offset>>3))];\n\t    } else {\n\t\tpush @dat, [0,0x11,unpack(\"C4\",pack(\"V\",$offset))];\n\t    }\n\t    $len += $#{@dat[-1]}+1;\n\t}\n\n\t# save frame pointer [if not pushed already]\n\tif ($cfa_reg ne \"%rsp\" && $fp_info == 0) {\n\t    $fp_info = $WIN64_reg_idx{$cfa_reg};\n\t    if (defined(my $offset = $saved_regs{$cfa_reg})) {\n\t\t$offset -= $cfa_rsp;\n\t\tsavereg($cfa_reg, $offset);\n\t    }\n\t}\n\n\t# set up frame pointer\n\tif ($fp_info) {\n\t    push @dat, [0,($fp_info<<4)|3];\t\t# UWOP_SET_FPREG\n\t    $len += $#{@dat[-1]}+1;\n\t    my $fp_off = $cfa_off - $cfa_rsp;\n\t    ($fp_off > 240 or $fp_off&0xf) and die \"invalid FP offset $fp_off\";\n\t    $fp_info |= $fp_off&-16;\n\t}\n\n\t# save registers\n\tforeach my $key (sort { $saved_regs{$b} <=> $saved_regs{$a} }\n\t\t\t      keys(%saved_regs)) {\n\t    next if ($cfa_reg ne \"%rsp\" && $cfa_reg eq $key);\n\t    my $offset = $saved_regs{$key} - $cfa_rsp;\n\t    savereg($key, $offset);\n\t}\n\n\tmy @ret;\n\t# generate 4-byte descriptor\n\tpush @ret, \".byte\t1,0,\".($len/2).\",$fp_info\";\n\t$len += 4;\n\t# keep objdump happy, pad to 4*n and add a 32-bit zero\n\tunshift @dat, [(0)x(((-$len)&3)+4)];\n\t$len += $#{@dat[0]}+1;\n\t# pad to 8*n\n\tunshift @dat, [(0)x((-$len)&7)] if ($len&7);\n\t# emit data\n\twhile(defined(my $row = pop @dat)) {\n\t    push @ret, \".byte\t\". join(\",\",\n\t\t\t\t\tmap { sprintf \"0x%02x\",$_ } @{$row});\n\t}\n\n\treturn @ret;\n    }\n    sub startproc {\n\treturn if ($cfa_rsp == -8);\n\t($cfa_reg, $cfa_off, $cfa_rsp) = (\"%rsp\", -8, -8);\n\t%saved_regs = ();\n\treturn \"startproc\";\n    }\n    sub endproc {\n\treturn if ($cfa_rsp == 0);\n\t($cfa_reg, $cfa_off, $cfa_rsp) = (\"%rsp\", 0, 0);\n\t%saved_regs = ();\n\treturn \"endproc\";\n    }\n    sub re {\n\tmy\t($class, $line) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\n\tif ($$line =~ s/^\\s*\\.cfi_(\\w+)\\s*//) {\n\t    bless $self,$class;\n\t    $ret = $self;\n\t    undef $self->{value};\n\t    my $dir = $1;\n\n\t    SWITCH: for ($dir) {\n\t    # What is $cfa_rsp? Effectively it's difference between %rsp\n\t    # value and current CFA, Canonical Frame Address, which is\n\t    # why it starts with -8. Recall that CFA is top of caller's\n\t    # stack...\n\t    /startproc/\t&& do {\t$dir = startproc(); last; };\n\t    /endproc/\t&& do {\t$dir = endproc();\n\t\t\t\t# .cfi_remember_state directives that are not\n\t\t\t\t# matched with .cfi_restore_state are\n\t\t\t\t# unnecessary.\n\t\t\t\tdie \"unpaired .cfi_remember_state\" if (@cfa_stack);\n\t\t\t\tlast;\n\t\t\t      };\n\t    /def_cfa_register/\n\t\t\t&& do {\t$cfa_off = $cfa_rsp if ($cfa_reg eq \"%rsp\");\n\t\t\t\t$cfa_reg = $$line;\n\t\t\t\t$cfa_rsp = $cfa_off if ($cfa_reg eq \"%rsp\");\n\t\t\t\tlast;\n\t\t\t      };\n\t    /def_cfa_offset/\n\t\t\t&& do {\t$cfa_off = -1*eval($$line);\n\t\t\t\t$cfa_rsp = $cfa_off if ($cfa_reg eq \"%rsp\");\n\t\t\t\tlast;\n\t\t\t      };\n\t    /adjust_cfa_offset/\n\t\t\t&& do { my $val = 1*eval($$line);\n\t\t\t\t$cfa_off -= $val;\n\t\t\t\tif ($cfa_reg eq \"%rsp\") {\n\t\t\t\t    $cfa_rsp -= $val;\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n\t    /alloca/\t&& do { $dir = undef;\n\t\t\t\tmy $val = 1*eval($$line);\n\t\t\t\t$cfa_rsp -= $val;\n\t\t\t\tif ($cfa_reg eq \"%rsp\") {\n\t\t\t\t    $cfa_off -= $val;\n\t\t\t\t    $dir = \"adjust_cfa_offset\";\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n\t    /def_cfa/\t&& do {\tif ($$line =~ /(%r\\w+)\\s*(?:,\\s*(.+))?/) {\n\t\t\t\t    $cfa_reg = $1;\n\t\t\t\t    if ($cfa_reg eq \"%rsp\" && !defined($2)) {\n\t\t\t\t\t$cfa_off = $cfa_rsp;\n\t\t\t\t\t$$line .= \",\".(-$cfa_rsp);\n\t\t\t\t    } else {\n\t\t\t\t\t$cfa_off = -1*eval($2);\n\t\t\t\t\t$cfa_rsp = $cfa_off if ($cfa_reg eq \"%rsp\");\n\t\t\t\t    }\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n\t    /push/\t&& do {\t$dir = undef;\n\t\t\t\t$cfa_rsp -= 8;\n\t\t\t\tif ($cfa_reg eq \"%rsp\") {\n\t\t\t\t    $cfa_off = $cfa_rsp;\n\t\t\t\t    $self->{value} = \".cfi_adjust_cfa_offset\\t8\\n\";\n\t\t\t\t}\n\t\t\t\t$saved_regs{$$line} = $cfa_rsp;\n\t\t\t\t$self->{value} .= \".cfi_offset\\t$$line,$cfa_rsp\";\n\t\t\t\tlast;\n\t\t\t      };\n\t    /pop/\t&& do {\t$dir = undef;\n\t\t\t\t$cfa_rsp += 8;\n\t\t\t\tif ($cfa_reg eq \"%rsp\") {\n\t\t\t\t    $cfa_off = $cfa_rsp;\n\t\t\t\t    $self->{value} = \".cfi_adjust_cfa_offset\\t-8\\n\";\n\t\t\t\t}\n\t\t\t\t$self->{value} .= \".cfi_restore\\t$$line\";\n\t\t\t\tdelete $saved_regs{$$line};\n\t\t\t\tlast;\n\t\t\t      };\n\t    /cfa_expression/\n\t\t\t&& do {\t$dir = undef;\n\t\t\t\t$self->{value} = \".cfi_escape\\t\" .\n\t\t\t\t\tjoin(\",\", map(sprintf(\"0x%02x\", $_),\n\t\t\t\t\t\t      cfa_expression($$line)));\n\t\t\t\tlast;\n\t\t\t      };\n\t    /remember_state/\n\t\t\t&& do {\tpush @cfa_stack,\n\t\t\t\t     [$cfa_reg,$cfa_off,$cfa_rsp,%saved_regs];\n\t\t\t\tlast;\n\t\t\t      };\n\t    /restore_state/\n\t\t\t&& do {\t     ($cfa_reg,$cfa_off,$cfa_rsp,%saved_regs)\n\t\t\t\t= @{pop @cfa_stack};\n\t\t\t\tlast;\n\t\t\t      };\n\t    /offset/\t&& do { if ($$line =~ /(%\\w+)(?:-%xmm(\\d+))?\\s*,\\s*(.+)/) {\n\t\t\t\t    my ($reg, $off, $xmmlast) = ($1, 1*eval($3), $2);\n\t\t\t\t    if ($reg !~ /%xmm(\\d+)/) {\n\t\t\t\t\t$saved_regs{$reg} = $off;\n\t\t\t\t    } else {\n\t\t\t\t\t$dir = undef;\n\t\t\t\t\t$xmmlast //= $1;\n\t\t\t\t\tfor (my $i=$1; $i<=$xmmlast; $i++) {\n\t\t\t\t\t    $saved_regs{\"%xmm$i\"} = $off;\n\t\t\t\t\t    $off += 16;\n\t\t\t\t\t}\n\t\t\t\t    }\n\t\t\t\t}\n\t\t\t\tlast;\n\t\t\t      };\n\t    /restore/\t&& do {\tdelete $saved_regs{$$line}; last; };\n\t    /end_prologue/\n\t\t\t&& do {\t$dir = undef;\n\t\t\t\t$self->{win64} = \".endprolog\";\n\t\t\t\tlast;\n\t\t\t      };\n\t    /epilogue/\t&& do {\t$dir = undef;\n\t\t\t\t$self->{win64} = \".epilogue\";\n\t\t\t\t$self->{value} = join(\"\\n\",\n\t\t\t\t\t\t      map { \".cfi_restore\\t$_\" }\n\t\t\t\t\t\t      sort keys(%saved_regs));\n\t\t\t\t%saved_regs = ();\n\t\t\t\tlast;\n\t\t\t      };\n\t    }\n\n\t    $self->{value} = \".cfi_$dir\\t$$line\" if ($dir);\n\n\t    $$line = \"\";\n\t}\n\n\treturn $ret;\n    }\n    sub out {\n\tmy $self = shift;\n\treturn $self->{value} if ($dwarf);\n\n\tif ($win64 and $current_function->{unwind}\n\t\t   and my $ret = $self->{win64}) {\n\t    my ($reg, $off) = ($cfa_reg =~ /%(?!rsp)/)  ? ($',    $cfa_off)\n\t\t\t\t\t\t\t: (\"rsp\", $cfa_rsp);\n\t    my $fname = $current_function->{name};\n\n\t    if ($ret eq \".endprolog\") {\n\t\t$ret = \"\";\n\t\tif ($current_function->{abi} eq \"svr4\") {\n\t\t    $ret .= label::win64_args();\n\t\t    $saved_regs{\"%rdi\"} = 0;\t# relative to CFA, remember?\n\t\t    $saved_regs{\"%rsi\"} = 8;\n\t\t}\n\n\t\tpush @pdata_seg,\n\t\t    \".rva\t.LSEH_begin_${fname}\",\n\t\t    \".rva\t.LSEH_body_${fname}\",\n\t\t    \".rva\t.LSEH_info_${fname}_prologue\",\"\";\n\t\tpush @xdata_seg,\n\t\t    \".LSEH_info_${fname}_prologue:\";\n\t\tif ($current_function->{unwind} eq \"%rbp\") {\n\t\t    if ($current_function->{abi} eq \"svr4\") {\n\t\t\tpush @xdata_seg,\n\t\t\t\".byte\t1,4,6,0x05\",\t# 6 unwind codes, %rbp is FP\n\t\t\t\".byte\t4,0x74,2,0\",\t# %rdi at 16(%rsp)\n\t\t\t\".byte\t4,0x64,3,0\",\t# %rsi at 24(%rsp)\n\t\t\t\".byte\t4,0x53\",\t# mov\t%rsp, %rbp\n\t\t\t\".byte\t1,0x50\",\t# push\t%rbp\n\t\t\t\".long\t0,0\"\t\t# pad to keep objdump happy\n\t\t\t;\n\t\t    } else {\n\t\t\tpush @xdata_seg,\n\t\t\t\".byte\t1,4,2,0x05\",\t# 2 unwind codes, %rbp is FP\n\t\t\t\".byte\t4,0x53\",\t# mov\t%rsp, %rbp\n\t\t\t\".byte\t1,0x50\",\t# push\t%rbp\n\t\t\t\".long\t0,0\"\t\t# pad to keep objdump happy\n\t\t\t;\n\t\t    }\n\t\t} else {\n\t\t    if ($current_function->{abi} eq \"svr4\") {\n\t\t\tpush @xdata_seg,\n\t\t\t\".byte\t1,0,5,0x0b\",\t# 5 unwind codes, %r11 is FP\n\t\t\t\".byte\t0,0x74,1,0\",\t# %rdi at 8(%rsp)\n\t\t\t\".byte\t0,0x64,2,0\",\t# %rsi at 16(%rsp)\n\t\t\t\".byte\t0,0xb3\",\t# set frame pointer\n\t\t\t\".byte\t0,0\",\t\t# padding\n\t\t\t\".long\t0,0\"\t\t# pad to keep objdump happy\n\t\t\t;\n\t\t    } else {\n\t\t\tpush @xdata_seg,\n\t\t\t\".byte\t1,0,1,0x0b\",\t# 1 unwind code, %r11 is FP\n\t\t\t\".byte\t0,0xb3\",\t# set frame pointer\n\t\t\t\".byte\t0,0\",\t\t# padding\n\t\t\t\".long\t0,0\"\t\t# pad to keep objdump happy\n\t\t\t;\n\t\t    }\n\t\t}\n\t\tpush @pdata_seg,\n\t\t    \".rva\t.LSEH_body_${fname}\",\n\t\t    \".rva\t.LSEH_epilogue_${fname}\",\n\t\t    \".rva\t.LSEH_info_${fname}_body\",\"\";\n\t\tpush @xdata_seg,\".LSEH_info_${fname}_body:\", xdata();\n\t\t$ret .= \"${decor}SEH_body_${fname}${colon}\\n\";\n\t    } elsif ($ret eq \".epilogue\") {\n\t\t%saved_regs = ();\n\t\t$cfa_rsp = $cfa_off;\n\t\t$ret = \"${decor}SEH_epilogue_${fname}${colon}\\n\";\n\t\tif ($current_function->{abi} eq \"svr4\") {\n\t\t    $saved_regs{\"%rdi\"} = 0;\t# relative to CFA, remember?\n\t\t    $saved_regs{\"%rsi\"} = 8;\n\n\t\t    push @pdata_seg,\n\t\t\t\".rva\t.LSEH_epilogue_${fname}\",\n\t\t\t\".rva\t.LSEH_end_${fname}\",\n\t\t\t\".rva\t.LSEH_info_${fname}_epilogue\",\"\";\n\t\t    push @xdata_seg,\".LSEH_info_${fname}_epilogue:\", xdata(), \"\";\n\t\t    if ($gas) {\n\t\t\t$ret .= \"\tmov\t\".(0-$off).\"(%$reg),%rdi\\n\";\n\t\t\t$ret .= \"\tmov\t\".(8-$off).\"(%$reg),%rsi\\n\";\n\t\t    } else {\n\t\t\t$ret .= \"\tmov\trdi,QWORD$PTR\\[\".(0-$off).\"+$reg\\]\";\n\t\t\t$ret .= \"\t;WIN64 epilogue\\n\";\n\t\t\t$ret .= \"\tmov\trsi,QWORD$PTR\\[\".(8-$off).\"+$reg\\]\\n\";\n\t\t    }\n\t\t}\n\t    }\n\t    return $ret;\n\t}\n\treturn;\n    }\n}\n{ package directive;\t# pick up directives, which start with .\n    sub re {\n\tmy\t($class, $line) = @_;\n\tmy\t$self = {};\n\tmy\t$ret;\n\tmy\t$dir;\n\n\t# chain-call to cfi_directive\n\t$ret = cfi_directive->re($line) and return $ret;\n\n\tif ($$line =~ /^\\s*(\\.\\w+)/) {\n\t    bless $self,$class;\n\t    $dir = $1;\n\t    $ret = $self;\n\t    undef $self->{value};\n\t    $$line = substr($$line,@+[0]); $$line =~ s/^\\s+//;\n\n\t    SWITCH: for ($dir) {\n\t\t/\\.global|\\.globl|\\.extern|\\.comm/\n\t\t\t    && do { $$line =~ s/([_a-z][_a-z0-9\\$]*)/$prefix\\1/gi;\n\t\t\t\t    $globals{$1} = $prefix.$1 if ($1);\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.type/    && do { my ($sym,$type,$narg,$unwind) = split(',',$$line);\n\t\t\t\t    if ($type eq \"\\@function\") {\n\t\t\t\t\tundef $current_function;\n\t\t\t\t\t$current_function->{name} = $sym;\n\t\t\t\t\t$current_function->{abi}  = \"svr4\";\n\t\t\t\t\t$current_function->{narg} = $narg;\n\t\t\t\t\t$current_function->{scope} = defined($globals{$sym})?\"PUBLIC\":\"PRIVATE\";\n\t\t\t\t\t$current_function->{unwind} = $unwind;\n\t\t\t\t\t$current_function->{pc} = -1;\n\t\t\t\t    } elsif ($type eq \"\\@abi-omnipotent\") {\n\t\t\t\t\tundef $current_function;\n\t\t\t\t\t$current_function->{name} = $sym;\n\t\t\t\t\t$current_function->{scope} = defined($globals{$sym})?\"PUBLIC\":\"PRIVATE\";\n\t\t\t\t\t$current_function->{unwind} = $unwind;\n\t\t\t\t\t$current_function->{pc} = -1;\n\t\t\t\t    }\n\t\t\t\t    $$line =~ s/\\@abi\\-omnipotent/\\@function/;\n\t\t\t\t    $$line =~ s/\\@function.*/\\@function/;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.asciz/   && do { if ($$line =~ /^\"(.*)\"$/) {\n\t\t\t\t\t$dir  = \".byte\";\n\t\t\t\t\t$$line = join(\",\",unpack(\"C*\",$1),0);\n\t\t\t\t    }\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.rva|\\.long|\\.quad/\n\t\t\t    && do { $$line =~ s/([_a-z][_a-z0-9\\$]*)/$globals{$1} or $1/gei;\n\t\t\t\t    $$line =~ s/\\.L/$decor/g;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t    }\n\n\t    if ($gas) {\n\t\t$self->{value} = $dir . \"\\t\" . $$line;\n\n\t\tif ($dir =~ /\\.extern/) {\n\t\t    $self->{value} = \"\"; # swallow extern\n\t\t} elsif (!$elf && $dir =~ /\\.type/) {\n\t\t    $self->{value} = \"\";\n\t\t    $self->{value} = \".def\\t\" . ($globals{$1} or $1) . \";\\t\" .\n\t\t\t\t(defined($globals{$1})?\".scl 2;\":\".scl 3;\") .\n\t\t\t\t\"\\t.type 32;\\t.endef\"\n\t\t\t\tif ($win64 && $$line =~ /([^,]+),\\@function/);\n\t\t} elsif ($dir =~ /\\.size/) {\n\t\t    $self->{value} = \"\" if (!$elf);\n\t\t    if ($dwarf and my $endproc = cfi_directive::endproc()) {\n\t\t\t$self->{value} = \".cfi_$endproc\\n$self->{value}\";\n\t\t    } elsif (!$elf && defined($current_function)) {\n\t\t\t$self->{value} .= \"${decor}SEH_end_$current_function->{name}:\"\n\t\t\t\tif ($win64 && $current_function->{abi} eq \"svr4\");\n\t\t\tundef $current_function;\n\t\t    }\n\t\t} elsif (!$elf && $dir =~ /\\.align/) {\n\t\t    $self->{value} = \".p2align\\t\" . (log($$line)/log(2));\n\t\t} elsif ($dir eq \".section\") {\n\t\t    $current_segment=$$line;\n\t\t    if (!$elf && $current_segment eq \".init\") {\n\t\t\tif\t($flavour eq \"macosx\")\t{ $self->{value} = \".mod_init_func\"; }\n\t\t\telsif\t($flavour eq \"mingw64\")\t{ $self->{value} = \".section\\t.ctors\"; }\n\t\t    }\n\t\t    if (!$elf && $current_segment eq \".rodata\") {\n\t\t\tif\t($flavour eq \"macosx\")\t{ $self->{value} = \".section\\t__TEXT,__const\"; }\n\t\t\telsif\t($flavour eq \"mingw64\")\t{ $self->{value} = \".section\\t.rdata\"; }\n\t\t    }\n\t\t} elsif ($dir =~ /\\.(text|data)/) {\n\t\t    $current_segment=\".$1\";\n\t\t} elsif ($dir =~ /\\.hidden/) {\n\t\t    if    ($flavour eq \"macosx\")  { $self->{value} = \".private_extern\\t$prefix$$line\"; }\n\t\t    elsif ($flavour eq \"mingw64\") { $self->{value} = \"\"; }\n\t\t} elsif ($dir =~ /\\.comm/) {\n\t\t    $self->{value} = \"$dir\\t$$line\";\n\t\t    $self->{value} =~ s|,([0-9]+),([0-9]+)$|\",$1,\".log($2)/log(2)|e if ($flavour eq \"macosx\");\n\t\t}\n\t\t$$line = \"\";\n\t\treturn $self;\n\t    }\n\n\t    # non-gas case or nasm/masm\n\t    SWITCH: for ($dir) {\n\t\t/\\.text/    && do { my $v=undef;\n\t\t\t\t    if ($nasm) {\n\t\t\t\t\t$v=\"section\t.text code align=64\\n\";\n\t\t\t\t    } else {\n\t\t\t\t\t$v=\"$current_segment\\tENDS\\n\" if ($current_segment);\n\t\t\t\t\t$current_segment = \".text\\$\";\n\t\t\t\t\t$v.=\"$current_segment\\tSEGMENT \";\n\t\t\t\t\t$v.=$masm>=$masmref ? \"ALIGN(256)\" : \"PAGE\";\n\t\t\t\t\t$v.=\" 'CODE'\";\n\t\t\t\t    }\n\t\t\t\t    $self->{value} = $v;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.data/    && do { my $v=undef;\n\t\t\t\t    if ($nasm) {\n\t\t\t\t\t$v=\"section\t.data data align=8\\n\";\n\t\t\t\t    } else {\n\t\t\t\t\t$v=\"$current_segment\\tENDS\\n\" if ($current_segment);\n\t\t\t\t\t$current_segment = \"_DATA\";\n\t\t\t\t\t$v.=\"$current_segment\\tSEGMENT\";\n\t\t\t\t    }\n\t\t\t\t    $self->{value} = $v;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.section/ && do { my $v=undef;\n\t\t\t\t    $$line =~ s/([^,]*).*/$1/;\n\t\t\t\t    $$line = \".CRT\\$XCU\" if ($$line eq \".init\");\n\t\t\t\t    $$line = \".rdata\" if ($$line eq \".rodata\");\n\t\t\t\t    my %align = ( p=>4, x=>8, r=>256);\n\t\t\t\t    if ($nasm) {\n\t\t\t\t\t$v=\"section\t$$line\";\n\t\t\t\t\tif ($$line=~/\\.([pxr])data/) {\n\t\t\t\t\t    $v.=\" rdata align=$align{$1}\";\n\t\t\t\t\t} elsif ($$line=~/\\.CRT\\$/i) {\n\t\t\t\t\t    $v.=\" rdata align=8\";\n\t\t\t\t\t}\n\t\t\t\t    } else {\n\t\t\t\t\t$v=\"$current_segment\\tENDS\\n\" if ($current_segment);\n\t\t\t\t\t$v.=\"$$line\\tSEGMENT\";\n\t\t\t\t\tif ($$line=~/\\.([pxr])data/) {\n\t\t\t\t\t    $v.=\" READONLY\";\n\t\t\t\t\t    $v.=\" ALIGN($align{$1})\" if ($masm>=$masmref);\n\t\t\t\t\t} elsif ($$line=~/\\.CRT\\$/i) {\n\t\t\t\t\t    $v.=\" READONLY \";\n\t\t\t\t\t    $v.=$masm>=$masmref ? \"ALIGN(8)\" : \"DWORD\";\n\t\t\t\t\t}\n\t\t\t\t    }\n\t\t\t\t    $current_segment = $$line;\n\t\t\t\t    $self->{value} = $v;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.extern/  && do { $self->{value}  = \"EXTERN\\t\".$$line;\n\t\t\t\t    $self->{value} .= \":NEAR\" if ($masm);\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.globl|.global/\n\t\t\t    && do { $self->{value}  = $masm?\"PUBLIC\":\"global\";\n\t\t\t\t    $self->{value} .= \"\\t\".$$line;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.size/    && do { if (defined($current_function)) {\n\t\t\t\t\tundef $self->{value};\n\t\t\t\t\tif ($current_function->{abi} eq \"svr4\") {\n\t\t\t\t\t    $self->{value}=\"${decor}SEH_end_$current_function->{name}${colon}\\n\";\n\t\t\t\t\t}\n\t\t\t\t\t$self->{value}.=\"$current_function->{name}\\tENDP\" if($masm && $current_function->{name});\n\t\t\t\t\tundef $current_function;\n\t\t\t\t    }\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.align/   && do { my $max = ($masm && $masm>=$masmref) ? 256 : 4096;\n\t\t\t\t    $self->{value} = \"ALIGN\\t\".($$line>$max?$max:$$line);\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.(value|long|rva|quad)/\n\t\t\t    && do { my $sz  = substr($1,0,1);\n\t\t\t\t    my @arr = split(/,\\s*/,$$line);\n\t\t\t\t    my $last = pop(@arr);\n\t\t\t\t    my $conv = sub  {\tmy $var=shift;\n\t\t\t\t\t\t\t$var=~s/^(0b[0-1]+)/oct($1)/eig;\n\t\t\t\t\t\t\t$var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm);\n\t\t\t\t\t\t\tif ($sz eq \"D\" && ($current_segment=~/.[px]data/ || $dir eq \".rva\"))\n\t\t\t\t\t\t\t{ $var=~s/^([_a-z\\$\\@][_a-z0-9\\$\\@]*)/$nasm?\"$1 wrt ..imagebase\":\"imagerel $1\"/egi; }\n\t\t\t\t\t\t\t$var;\n\t\t\t\t\t\t    };\n\n\t\t\t\t    $sz =~ tr/bvlrq/BWDDQ/;\n\t\t\t\t    $self->{value} = \"\\tD$sz\\t\";\n\t\t\t\t    for (@arr) { $self->{value} .= &$conv($_).\",\"; }\n\t\t\t\t    $self->{value} .= &$conv($last);\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.byte/    && do { my @str=split(/,\\s*/,$$line);\n\t\t\t\t    map(s/(0b[0-1]+)/oct($1)/eig,@str);\n\t\t\t\t    map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm);\n\t\t\t\t    while ($#str>15) {\n\t\t\t\t\t$self->{value}.=\"DB\\t\"\n\t\t\t\t\t\t.join(\",\",@str[0..15]).\"\\n\";\n\t\t\t\t\tforeach (0..15) { shift @str; }\n\t\t\t\t    }\n\t\t\t\t    $self->{value}.=\"DB\\t\"\n\t\t\t\t\t\t.join(\",\",@str) if (@str);\n\t\t\t\t    last;\n\t\t\t\t  };\n\t\t/\\.comm/    && do { my @str=split(/,\\s*/,$$line);\n\t\t\t\t    my $v=undef;\n\t\t\t\t    if ($nasm) {\n\t\t\t\t\t$v.=\"common\t$prefix@str[0] @str[1]\";\n\t\t\t\t    } else {\n\t\t\t\t\t$v=\"$current_segment\\tENDS\\n\" if ($current_segment);\n\t\t\t\t\t$current_segment = \"_DATA\";\n\t\t\t\t\t$v.=\"$current_segment\\tSEGMENT\\n\";\n\t\t\t\t\t$v.=\"COMM\t@str[0]:DWORD:\".@str[1]/4;\n\t\t\t\t    }\n\t\t\t\t    $self->{value} = $v;\n\t\t\t\t    last;\n\t\t\t\t  };\n\t    }\n\t    $$line = \"\";\n\t}\n\n\t$ret;\n    }\n    sub out {\n\tmy $self = shift;\n\t$self->{value};\n    }\n}\n\n# Upon initial x86_64 introduction SSE>2 extensions were not introduced\n# yet. In order not to be bothered by tracing exact assembler versions,\n# but at the same time to provide a bare security minimum of AES-NI, we\n# hard-code some instructions. Extensions past AES-NI on the other hand\n# are traced by examining assembler version in individual perlasm\n# modules...\n\nmy %regrm = (\t\"%eax\"=>0, \"%ecx\"=>1, \"%edx\"=>2, \"%ebx\"=>3,\n\t\t\"%esp\"=>4, \"%ebp\"=>5, \"%esi\"=>6, \"%edi\"=>7\t);\n\nsub rex {\n my $opcode=shift;\n my ($dst,$src,$rex)=@_;\n\n   $rex|=0x04 if($dst>=8);\n   $rex|=0x01 if($src>=8);\n   push @$opcode,($rex|0x40) if ($rex);\n}\n\nmy $movq = sub {\t# elderly gas can't handle inter-register movq\n  my $arg = shift;\n  my @opcode=(0x66);\n    if ($arg =~ /%xmm([0-9]+),\\s*%r(\\w+)/) {\n\tmy ($src,$dst)=($1,$2);\n\tif ($dst !~ /[0-9]+/)\t{ $dst = $regrm{\"%e$dst\"}; }\n\trex(\\@opcode,$src,$dst,0x8);\n\tpush @opcode,0x0f,0x7e;\n\tpush @opcode,0xc0|(($src&7)<<3)|($dst&7);\t# ModR/M\n\t@opcode;\n    } elsif ($arg =~ /%r(\\w+),\\s*%xmm([0-9]+)/) {\n\tmy ($src,$dst)=($2,$1);\n\tif ($dst !~ /[0-9]+/)\t{ $dst = $regrm{\"%e$dst\"}; }\n\trex(\\@opcode,$src,$dst,0x8);\n\tpush @opcode,0x0f,0x6e;\n\tpush @opcode,0xc0|(($src&7)<<3)|($dst&7);\t# ModR/M\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $pextrd = sub {\n    if (shift =~ /\\$([0-9]+),\\s*%xmm([0-9]+),\\s*(%\\w+)/) {\n      my @opcode=(0x66);\n\tmy $imm=$1;\n\tmy $src=$2;\n\tmy $dst=$3;\n\tif ($dst =~ /%r([0-9]+)d/)\t{ $dst = $1; }\n\telsif ($dst =~ /%e/)\t\t{ $dst = $regrm{$dst}; }\n\trex(\\@opcode,$src,$dst);\n\tpush @opcode,0x0f,0x3a,0x16;\n\tpush @opcode,0xc0|(($src&7)<<3)|($dst&7);\t# ModR/M\n\tpush @opcode,$imm;\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $pinsrd = sub {\n    if (shift =~ /\\$([0-9]+),\\s*(%\\w+),\\s*%xmm([0-9]+)/) {\n      my @opcode=(0x66);\n\tmy $imm=$1;\n\tmy $src=$2;\n\tmy $dst=$3;\n\tif ($src =~ /%r([0-9]+)/)\t{ $src = $1; }\n\telsif ($src =~ /%e/)\t\t{ $src = $regrm{$src}; }\n\trex(\\@opcode,$dst,$src);\n\tpush @opcode,0x0f,0x3a,0x22;\n\tpush @opcode,0xc0|(($dst&7)<<3)|($src&7);\t# ModR/M\n\tpush @opcode,$imm;\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $pshufb = sub {\n    if (shift =~ /%xmm([0-9]+),\\s*%xmm([0-9]+)/) {\n      my @opcode=(0x66);\n\trex(\\@opcode,$2,$1);\n\tpush @opcode,0x0f,0x38,0x00;\n\tpush @opcode,0xc0|($1&7)|(($2&7)<<3);\t\t# ModR/M\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $palignr = sub {\n    if (shift =~ /\\$([0-9]+),\\s*%xmm([0-9]+),\\s*%xmm([0-9]+)/) {\n      my @opcode=(0x66);\n\trex(\\@opcode,$3,$2);\n\tpush @opcode,0x0f,0x3a,0x0f;\n\tpush @opcode,0xc0|($2&7)|(($3&7)<<3);\t\t# ModR/M\n\tpush @opcode,$1;\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $pclmulqdq = sub {\n    if (shift =~ /\\$([x0-9a-f]+),\\s*%xmm([0-9]+),\\s*%xmm([0-9]+)/) {\n      my @opcode=(0x66);\n\trex(\\@opcode,$3,$2);\n\tpush @opcode,0x0f,0x3a,0x44;\n\tpush @opcode,0xc0|($2&7)|(($3&7)<<3);\t\t# ModR/M\n\tmy $c=$1;\n\tpush @opcode,$c=~/^0/?oct($c):$c;\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $rdrand = sub {\n    if (shift =~ /%[er](\\w+)/) {\n      my @opcode=();\n      my $dst=$1;\n\tif ($dst !~ /[0-9]+/) { $dst = $regrm{\"%e$dst\"}; }\n\trex(\\@opcode,0,$dst,8);\n\tpush @opcode,0x0f,0xc7,0xf0|($dst&7);\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $rdseed = sub {\n    if (shift =~ /%[er](\\w+)/) {\n      my @opcode=();\n      my $dst=$1;\n\tif ($dst !~ /[0-9]+/) { $dst = $regrm{\"%e$dst\"}; }\n\trex(\\@opcode,0,$dst,8);\n\tpush @opcode,0x0f,0xc7,0xf8|($dst&7);\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\n# Not all AVX-capable assemblers recognize AMD XOP extension. Since we\n# are using only two instructions hand-code them in order to be excused\n# from chasing assembler versions...\n\nsub rxb {\n my $opcode=shift;\n my ($dst,$src1,$src2,$rxb)=@_;\n\n   $rxb|=0x7<<5;\n   $rxb&=~(0x04<<5) if($dst>=8);\n   $rxb&=~(0x01<<5) if($src1>=8);\n   $rxb&=~(0x02<<5) if($src2>=8);\n   push @$opcode,$rxb;\n}\n\nmy $vprotd = sub {\n    if (shift =~ /\\$([x0-9a-f]+),\\s*%xmm([0-9]+),\\s*%xmm([0-9]+)/) {\n      my @opcode=(0x8f);\n\trxb(\\@opcode,$3,$2,-1,0x08);\n\tpush @opcode,0x78,0xc2;\n\tpush @opcode,0xc0|($2&7)|(($3&7)<<3);\t\t# ModR/M\n\tmy $c=$1;\n\tpush @opcode,$c=~/^0/?oct($c):$c;\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\nmy $vprotq = sub {\n    if (shift =~ /\\$([x0-9a-f]+),\\s*%xmm([0-9]+),\\s*%xmm([0-9]+)/) {\n      my @opcode=(0x8f);\n\trxb(\\@opcode,$3,$2,-1,0x08);\n\tpush @opcode,0x78,0xc3;\n\tpush @opcode,0xc0|($2&7)|(($3&7)<<3);\t\t# ModR/M\n\tmy $c=$1;\n\tpush @opcode,$c=~/^0/?oct($c):$c;\n\t@opcode;\n    } else {\n\t();\n    }\n};\n\n# Intel Control-flow Enforcement Technology extension. All functions and\n# indirect branch targets will have to start with this instruction...\n# However, it should not be used in functions' prologues explicitly, as\n# it's added automatically [and in the right spot]. Which leaves only\n# non-function indirect branch targets, such as in a case-like dispatch\n# table, as application area.\n\nmy $endbr64 = sub {\n    (0xf3,0x0f,0x1e,0xfa);\n};\n\n########################################################################\n\nmy $preproc_prefix = \"#\";\n\nif ($nasm) {\n    $preproc_prefix = \"%\";\n    print <<___;\ndefault\trel\n%define XMMWORD\n%define YMMWORD\n%define ZMMWORD\n___\n} elsif ($masm) {\n    $preproc_prefix = \"\";\n    print <<___;\nOPTION\tDOTNAME\n___\n}\n\nsub process {\n    my $line = shift;\n\n    $line =~ s|\\R$||;\t\t# Better chomp\n\n    if ($line =~ m/^#\\s*(if|elif|else|endif)(.*)/) {\t# pass through preproc\n\tif ($win64 && $current_function->{abi} eq \"svr4\"\n\t\t   && $current_function->{narg} >= 0) {\n\t    print label::win64_args();\n\t}\n\tprint $preproc_prefix,$1,$2,\"\\n\";\n\tnext;\n    }\n\n    if ($line =~ m|#\\s*__SGX_LVI_HARDENING_CLOBBER__=(?:%?(r\\w+))|) {\n\t$ret_clobber = $1;\n    }\n\n    $line =~ s|[#!].*$||;\t# get rid of asm-style comments...\n    $line =~ s|/\\*.*\\*/||;\t# ... and C-style comments...\n    $line =~ s|^\\s+||;\t\t# ... and skip white spaces in beginning\n    $line =~ s|\\s+$||;\t\t# ... and at the end\n\n    if (my $label=label->re(\\$line))\t{ print $label->out(); }\n\n    if (my $directive=directive->re(\\$line)) {\n\tprintf \"%s\",$directive->out();\n    } elsif (my $opcode=opcode->re(\\$line)) {\n\tmy $asm = eval(\"\\$\".$opcode->mnemonic());\n\n\tif ((ref($asm) eq 'CODE') && scalar(my @bytes=&$asm($line))) {\n\t    print $gas?\".byte\\t\":\"DB\\t\",join(',',@bytes),\"\\n\";\n\t    next;\n\t}\n\n\tmy @args;\n\tARGUMENT: while (1) {\n\t    my $arg;\n\n\t    ($arg=register->re(\\$line, $opcode))||\n\t    ($arg=const->re(\\$line))\t\t||\n\t    ($arg=ea->re(\\$line, $opcode))\t||\n\t    ($arg=expr->re(\\$line, $opcode))\t||\n\t    last ARGUMENT;\n\n\t    push @args,$arg;\n\n\t    last ARGUMENT if ($line !~ /^,/);\n\n\t    $line =~ s/^,\\s*//;\n\t} # ARGUMENT:\n\n\tif ($win64 && $current_function->{abi} eq \"svr4\"\n\t\t   && $current_function->{narg} >= 0) {\n\t    my $pc = $current_function->{pc};\n\t    my $op = $opcode->{op};\n\t    my $a0 = @args[0]->{value} if ($#args>=0);\n\t    if (!$current_function->{unwind}\n\t\t|| $pc == 0 && !($op eq \"push\" && $a0 eq \"rbp\")\n\t\t|| $pc == 1 && !($op eq \"mov\" && $a0 eq \"rsp\"\n\t\t\t\t\t      && @args[1]->{value} eq \"rbp\"\n\t\t\t\t\t      && ($current_function->{unwind} = \"%rbp\"))\n\t\t|| $pc > 1) {\n\t\tprint label::win64_args();\n\t    }\n\t}\n\n\tif ($#args>=0) {\n\t    my $insn;\n\t    my $sz=$opcode->size();\n\n\t    if ($gas) {\n\t\t$insn = $opcode->out($#args>=1?$args[$#args]->size():$sz);\n\t\t@args = map($_->out($sz),@args);\n\t\tprintf \"\\t%s\\t%s\",$insn,join(\",\",@args);\n\t    } else {\n\t\t$insn = $opcode->out();\n\t\tforeach (@args) {\n\t\t    my $arg = $_->out();\n\t\t    # $insn.=$sz compensates for movq, pinsrw, ...\n\t\t    if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz=\"x\" if(!$sz); last; }\n\t\t    if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz=\"y\" if(!$sz); last; }\n\t\t    if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz=\"z\" if(!$sz); last; }\n\t\t    if ($arg =~ /^mm[0-9]+$/)  { $insn.=$sz; $sz=\"q\" if(!$sz); last; }\n\t\t}\n\t\t@args = reverse(@args);\n\t\tundef $sz if ($nasm && $opcode->mnemonic() eq \"lea\");\n\t\tprintf \"\\t%s\\t%s\",$insn,join(\",\",map($_->out($sz),@args));\n\t    }\n\t} else {\n\t    printf \"\\t%s\",$opcode->out();\n\t}\n\n\t++$current_function->{pc} if (defined($current_function));\n    }\n\n    print $line,\"\\n\";\n}\n\nwhile(<>) { process($_); }\n\nmap { process($_) } @pdata_seg if ($win64 && $#pdata_seg>1);\nmap { process($_) } @xdata_seg if ($win64 && $#xdata_seg>1);\n\n# platform-specific epilogue\nif ($masm) {\n    print \"\\n$current_segment\\tENDS\\n\"\tif ($current_segment);\n    print \"END\\n\";\n} elsif ($elf) {\n    # -fcf-protection segment, snatched from compiler -S output\n    my $align = ($flavour =~ /elf32/) ? 4 : 8;\n    print <<___;\n\n.section\t.note.GNU-stack,\"\",\\@progbits\n#ifndef\t__SGX_LVI_HARDENING__\n.section\t.note.gnu.property,\"a\",\\@note\n\t.long\t4,2f-1f,5\n\t.byte\t0x47,0x4E,0x55,0\n1:\t.long\t0xc0000002,4,3\n.align\t$align\n2:\n#endif\n___\n}\n\nclose STDOUT;\n\n\f#################################################\n# Cross-reference x86_64 ABI \"card\"\n#\n# \t\tUnix\t\tWin64\n# %rax\t\t*\t\t*\n# %rbx\t\t-\t\t-\n# %rcx\t\t#4\t\t#1\n# %rdx\t\t#3\t\t#2\n# %rsi\t\t#2\t\t-\n# %rdi\t\t#1\t\t-\n# %rbp\t\t-\t\t-\n# %rsp\t\t-\t\t-\n# %r8\t\t#5\t\t#3\n# %r9\t\t#6\t\t#4\n# %r10\t\t*\t\t*\n# %r11\t\t*\t\t*\n# %r12\t\t-\t\t-\n# %r13\t\t-\t\t-\n# %r14\t\t-\t\t-\n# %r15\t\t-\t\t-\n#\n# (*)\tvolatile register\n# (-)\tpreserved by callee\n# (#)\tNth argument, volatile\n#\n# In Unix terms top of stack is argument transfer area for arguments\n# which could not be accommodated in registers. Or in other words 7th\n# [integer] argument resides at 8(%rsp) upon function entry point.\n# 128 bytes above %rsp constitute a \"red zone\" which is not touched\n# by signal handlers and can be used as temporal storage without\n# allocating a frame.\n#\n# In Win64 terms N*8 bytes on top of stack is argument transfer area,\n# which belongs to/can be overwritten by callee. N is the number of\n# arguments passed to callee, *but* not less than 4! This means that\n# upon function entry point 5th argument resides at 40(%rsp), as well\n# as that 32 bytes from 8(%rsp) can always be used as temporal\n# storage [without allocating a frame]. One can actually argue that\n# one can assume a \"red zone\" above stack pointer under Win64 as well.\n# Point is that at apparently no occasion Windows kernel would alter\n# the area above user stack pointer in true asynchronous manner...\n#\n# All the above means that if assembler programmer adheres to Unix\n# register and stack layout, but disregards the \"red zone\" existence,\n# it's possible to use following prologue and epilogue to \"gear\" from\n# Unix to Win64 ABI in leaf functions with not more than 6 arguments.\n#\n# omnipotent_function:\n# ifdef WIN64\n#\tmovq\t%rdi,8(%rsp)\n#\tmovq\t%rsi,16(%rsp)\n#\tmovq\t%rcx,%rdi\t; if 1st argument is actually present\n#\tmovq\t%rdx,%rsi\t; if 2nd argument is actually ...\n#\tmovq\t%r8,%rdx\t; if 3rd argument is ...\n#\tmovq\t%r9,%rcx\t; if 4th argument ...\n#\tmovq\t40(%rsp),%r8\t; if 5th ...\n#\tmovq\t48(%rsp),%r9\t; if 6th ...\n# endif\n#\t...\n# ifdef WIN64\n#\tmovq\t8(%rsp),%rdi\n#\tmovq\t16(%rsp),%rsi\n# endif\n#\tret\n#\n\f#################################################\n# Win64 SEH, Structured Exception Handling.\n#\n# Unlike on Unix systems(*) lack of Win64 stack unwinding information\n# has undesired side-effect at run-time: if an exception is raised in\n# assembler subroutine such as those in question (basically we're\n# referring to segmentation violations caused by malformed input\n# parameters), the application is briskly terminated without invoking\n# any exception handlers, most notably without generating memory dump\n# or any user notification whatsoever. This poses a problem. It's\n# possible to address it by registering custom language-specific\n# handler that would restore processor context to the state at\n# subroutine entry point and return \"exception is not handled, keep\n# unwinding\" code. Writing such handler can be a challenge... But it's\n# doable, though requires certain coding convention. Consider following\n# snippet:\n#\n# .type\tfunction,@function\n# function:\n#\tmovq\t%rsp,%rax\t# copy rsp to volatile register\n#\tpushq\t%r15\t\t# save non-volatile registers\n#\tpushq\t%rbx\n#\tpushq\t%rbp\n#\tmovq\t%rsp,%r11\n#\tsubq\t%rdi,%r11\t# prepare [variable] stack frame\n#\tandq\t$-64,%r11\n#\tmovq\t%rax,0(%r11)\t# check for exceptions\n#\tmovq\t%r11,%rsp\t# allocate [variable] stack frame\n#\tmovq\t%rax,0(%rsp)\t# save original rsp value\n# magic_point:\n#\t...\n#\tmovq\t0(%rsp),%rcx\t# pull original rsp value\n#\tmovq\t-24(%rcx),%rbp\t# restore non-volatile registers\n#\tmovq\t-16(%rcx),%rbx\n#\tmovq\t-8(%rcx),%r15\n#\tmovq\t%rcx,%rsp\t# restore original rsp\n# magic_epilogue:\n#\tret\n# .size function,.-function\n#\n# The key is that up to magic_point copy of original rsp value remains\n# in chosen volatile register and no non-volatile register, except for\n# rsp, is modified. While past magic_point rsp remains constant till\n# the very end of the function. In this case custom language-specific\n# exception handler would look like this:\n#\n# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,\n#\t\tCONTEXT *context,DISPATCHER_CONTEXT *disp)\n# {\tULONG64 *rsp = (ULONG64 *)context->Rax;\n#\tULONG64  rip = context->Rip;\n#\n#\tif (rip >= magic_point)\n#\t{   rsp = (ULONG64 *)context->Rsp;\n#\t    if (rip < magic_epilogue)\n#\t    {\trsp = (ULONG64 *)rsp[0];\n#\t\tcontext->Rbp = rsp[-3];\n#\t\tcontext->Rbx = rsp[-2];\n#\t\tcontext->R15 = rsp[-1];\n#\t    }\n#\t}\n#\tcontext->Rsp = (ULONG64)rsp;\n#\tcontext->Rdi = rsp[1];\n#\tcontext->Rsi = rsp[2];\n#\n#\tmemcpy (disp->ContextRecord,context,sizeof(CONTEXT));\n#\tRtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase,\n#\t\tdips->ControlPc,disp->FunctionEntry,disp->ContextRecord,\n#\t\t&disp->HandlerData,&disp->EstablisherFrame,NULL);\n#\treturn ExceptionContinueSearch;\n# }\n#\n# It's appropriate to implement this handler in assembler, directly in\n# function's module. In order to do that one has to know members'\n# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant\n# values. Here they are:\n#\n#\tCONTEXT.Rax\t\t\t\t120\n#\tCONTEXT.Rcx\t\t\t\t128\n#\tCONTEXT.Rdx\t\t\t\t136\n#\tCONTEXT.Rbx\t\t\t\t144\n#\tCONTEXT.Rsp\t\t\t\t152\n#\tCONTEXT.Rbp\t\t\t\t160\n#\tCONTEXT.Rsi\t\t\t\t168\n#\tCONTEXT.Rdi\t\t\t\t176\n#\tCONTEXT.R8\t\t\t\t184\n#\tCONTEXT.R9\t\t\t\t192\n#\tCONTEXT.R10\t\t\t\t200\n#\tCONTEXT.R11\t\t\t\t208\n#\tCONTEXT.R12\t\t\t\t216\n#\tCONTEXT.R13\t\t\t\t224\n#\tCONTEXT.R14\t\t\t\t232\n#\tCONTEXT.R15\t\t\t\t240\n#\tCONTEXT.Rip\t\t\t\t248\n#\tCONTEXT.Xmm6\t\t\t\t512\n#\tsizeof(CONTEXT)\t\t\t\t1232\n#\tDISPATCHER_CONTEXT.ControlPc\t\t0\n#\tDISPATCHER_CONTEXT.ImageBase\t\t8\n#\tDISPATCHER_CONTEXT.FunctionEntry\t16\n#\tDISPATCHER_CONTEXT.EstablisherFrame\t24\n#\tDISPATCHER_CONTEXT.TargetIp\t\t32\n#\tDISPATCHER_CONTEXT.ContextRecord\t40\n#\tDISPATCHER_CONTEXT.LanguageHandler\t48\n#\tDISPATCHER_CONTEXT.HandlerData\t\t56\n#\tUNW_FLAG_NHANDLER\t\t\t0\n#\tExceptionContinueSearch\t\t\t1\n#\n# In order to tie the handler to the function one has to compose\n# couple of structures: one for .xdata segment and one for .pdata.\n#\n# UNWIND_INFO structure for .xdata segment would be\n#\n# function_unwind_info:\n#\t.byte\t9,0,0,0\n#\t.rva\thandler\n#\n# This structure designates exception handler for a function with\n# zero-length prologue, no stack frame or frame register.\n#\n# To facilitate composing of .pdata structures, auto-generated \"gear\"\n# prologue copies rsp value to rax and denotes next instruction with\n# .LSEH_begin_{function_name} label. This essentially defines the SEH\n# styling rule mentioned in the beginning. Position of this label is\n# chosen in such manner that possible exceptions raised in the \"gear\"\n# prologue would be accounted to caller and unwound from latter's frame.\n# End of function is marked with respective .LSEH_end_{function_name}\n# label. To summarize, .pdata segment would contain\n#\n#\t.rva\t.LSEH_begin_function\n#\t.rva\t.LSEH_end_function\n#\t.rva\tfunction_unwind_info\n#\n# Reference to function_unwind_info from .xdata segment is the anchor.\n# In case you wonder why references are 32-bit .rvas and not 64-bit\n# .quads. References put into these two segments are required to be\n# *relative* to the base address of the current binary module, a.k.a.\n# image base. No Win64 module, be it .exe or .dll, can be larger than\n# 2GB and thus such relative references can be and are accommodated in\n# 32 bits.\n#\n# Having reviewed the example function code, one can argue that \"movq\n# %rsp,%rax\" above is redundant. It is not! Keep in mind that on Unix\n# rax would contain an undefined value. If this \"offends\" you, use\n# another register and refrain from modifying rax till magic_point is\n# reached, i.e. as if it was a non-volatile register. If more registers\n# are required prior [variable] frame setup is completed, note that\n# nobody says that you can have only one \"magic point.\" You can\n# \"liberate\" non-volatile registers by denoting last stack off-load\n# instruction and reflecting it in finer grade unwind logic in handler.\n# After all, isn't it why it's called *language-specific* handler...\n#\n# SE handlers are also involved in unwinding stack when executable is\n# profiled or debugged. Profiling implies additional limitations that\n# are too subtle to discuss here. For now it's sufficient to say that\n# in order to simplify handlers one should either a) offload original\n# %rsp to stack (like discussed above); or b) if you have a register to\n# spare for frame pointer, choose volatile one.\n#\n# (*)\tNote that we're talking about run-time, not debug-time. Lack of\n#\tunwind information makes debugging hard on both Windows and\n#\tUnix. \"Unlike\" refers to the fact that on Unix signal handler\n#\twill always be invoked, core dumped and appropriate exit code\n#\treturned to parent (for user notification).\n#\n########################################################################\n# As of May 2020 an alternative approach that works with both exceptions\n# and debugging/profiling was implemented by re-purposing DWARF .cfi\n# annotations even for Win64 unwind tables' generation. Unfortunately,\n# but not really unexpectedly, it imposes additional limitations on\n# coding style. Probably the most significant limitation is that the\n# frame pointer has to be at 16*n distance from the stack pointer at the\n# exit from prologue. But first things first. There are two additional\n# synthetic .cfi directives, .cfi_end_prologue and .cfi_epilogue,\n# that need to be added to all functions marked with additional .type\n# tag (see example below). There are \"do's and don'ts\" for prologue\n# and epilogue. It shouldn't come as a surprise that in prologue one may\n# not modify non-volatile registers, but one may not modify %r11 either.\n# This is because it's used as a temporary frame pointer(*). There are\n# two exceptions to this rule. 1) One can set up a non-volatile register\n# or %r11 as a frame pointer, but it must be last instruction in the\n# prologue. 2) One can use 'push %rbp' as first instruction immediately\n# followed by 'mov %rsp,%rbp' to use %rbp as \"legacy\" frame pointer.\n# Constraints for epilogue, or rather on its boundary, depend on whether\n# the frame is fixed- or variable-length. In fixed-frame subroutine\n# stack pointer has to be restored in the last instruction prior to the\n# .cfi_epilogue directive. If it's a variable-frame subroutine, and a\n# non-volatile register was used as a frame pointer, then the last\n# instruction prior to the directive has to restore its original value.\n# This means that final stack pointer adjustment would have to be\n# pushed past the directive. Normally this would render the epilogue\n# non-unwindable, so special care has to be taken. To resolve the\n# dilemma, copy the frame pointer to a volatile register in advance.\n# To give an example:\n#\n# .type\trbp_as_frame_pointer,\\@function,3,\"unwind\"  # mind extra tag!\n# rbp_as_frame_pointer:\n# .cfi_startproc\n#\tpush\t%rbp\n# .cfi_push\t%rbp\n#\tpush\t%rbx\n# .cfi_push\t%rbx\n# \tmov\t%rsp,%rbp\t# last instruction in prologue\n# .cfi_def_cfa_register\t%rbp\t# %rsp-%rbp has to be 16*n, e.g. 16*0\n# .cfi_end_prologue\n#\tsub\t\\$40,%rsp\n#\tand\t\\$-64,%rsp\n#\t...\n#\tmov\t%rbp,%r11\n# .cfi_def_cfa_register\t%r11\t# copy frame pointer to volatile %r11\n#\tmov\t0(%rbp),%rbx\n#\tmov\t8(%rbp),%rbp\t# last instruction prior epilogue\n# .cfi_epilogue\t\t\t# may not change %r11 in epilogue\n#\tlea\t16(%r11),%rsp\n#\tret\n# .cfi_endproc\n# .size\trbp_as_frame_pointer,.-rbp_as_frame_pointer\n#\n# An example of \"legacy\" frame pointer:\n#\n# .type\tlegacy_frame_pointer,\\@function,3,\"unwind\"  # mind extra tag!\n# legacy_frame_pointer:\n# .cfi_startproc\n#\tpush\t%rbp\n# .cfi_push\t%rbp\n# \tmov\t%rsp,%rbp\n# .cfi_def_cfa_register\t%rbp\n#\tpush\t%rbx\n# .cfi_push\t%rbx\n#\tsub\t\\$40,%rsp\n# .cfi_alloca\t40\n# .cfi_end_prologue\t\t# %rsp-%rbp has to be 16*n\n#\tand\t\\$-64,%rsp\n#\t...\n#\tmov\t-8(%rbp),%rbx\n#\tmov\t%rbp,%rsp\n# .cfi_def_cfa_regiser\t%rsp\n#\tpop\t%rbp\t\t# recognized by Windows\n# .cfi_pop\t%rbp\n# .cfi_epilogue\n#\tret\n# .cfi_endproc\n# .size\tlegacy_frame_pointer,.-legacy_frame_pointer\n#\n# To give an example of fixed-frame subroutine for reference:\n#\n# .type\tfixed_frame,\\@function,3,\"unwind\"           # mind extra tag!\n# fixed_frame:\n# .cfi_startproc\n#\tpush\t%rbp\n# .cfi_push\t%rbp\n#\tpush\t%rbx\n# .cfi_push\t%rbx\n#\tsub\t\\$40,%rsp\n# .cfi_adjust_cfa_offset 40\n# .cfi_end_prologue\n#\t...\n#\tmov\t40(%rsp),%rbx\n#\tmov\t48(%rsp),%rbp\n#\tlea\t56(%rsp),%rsp\n# .cfi_adjust_cfa_offset -56\n# .cfi_epilogue\n#\tret\n# .cfi_endproc\n# .size\tfixed_frame,.-fixed_frame\n#\n# As for epilogue itself, one can only work on non-volatile registers.\n# \"Non-volatile\" in \"Windows\" sense, i.e. minus %rdi and %rsi.\n#\n# On a final note, mixing old-style and modernized subroutines in the\n# same file takes some trickery. Ones of the new kind have to appear\n# after old-style ones. This has everything to do with the fact that\n# entries in the .pdata segment have to appear in strictly same order\n# as corresponding subroutines, and auto-generated RUNTIME_FUNCTION\n# structures get mechanically appended to whatever existing .pdata.\n#\n# (*)\tJust in case, why %r11 and not %rax. This has everything to do\n#\twith the way UNWIND_INFO is, one just can't designate %rax as\n#\tframe pointer.\n"
  },
  {
    "path": "src/blst_t.hpp",
    "content": "// Copyright Supranational LLC\n// Licensed under the Apache License, Version 2.0, see LICENSE for details.\n// SPDX-License-Identifier: Apache-2.0\n\n#ifndef __BLST_T_HPP__\n#define __BLST_T_HPP__\n\n/*\n * These templates, blst_384_t and blst_256_t, allow to instantiate slim\n * C++ shims to blst assembly with arbitrary moduli. Well, not literally\n * arbitrary, as there is a limitation, specifically 256-bit modulus has\n * to be not larger than 2^256-2^192-1.\n */\n\n#ifdef __GNUC__\n# pragma GCC diagnostic push\n# pragma GCC diagnostic ignored \"-Wunused-function\"\n#endif\n\nextern \"C\" {\n#include \"vect.h\"\n}\n#include \"bytes.h\"\n\n#undef launder // avoid conflict with C++ >=17\n\n#ifdef __GNUC__\n# pragma GCC diagnostic pop\n#endif\n\n#include <cstdint>\n\nstatic inline void vec_left_align(limb_t *out, const limb_t *inp, size_t N)\n{\n    const unsigned int nbits = sizeof(inp[0])*8;\n    const unsigned int align = (0 - N) % nbits;\n    size_t n = (N + nbits - 1) / nbits;\n\n    if (align) {\n        limb_t top = inp[n-1] << align;\n\n        while (--n) {\n            limb_t next = inp[n-1];\n            out[n] = top | next >> (nbits-align);\n            top = next << align;\n        }\n        out[0] = top;\n    } else {\n        for (size_t i = 0; i < n; i++)\n            out[i] = inp[i];\n    }\n}\n\ntemplate<const size_t N, const vec384 MOD, const limb_t M0,\n                         const vec384 RR, const vec384 ONE>\nclass blst_384_t {\nprivate:\n    vec384 val;\n\n    inline operator const limb_t*() const           { return val;    }\n    inline operator limb_t*()                       { return val;    }\n    inline limb_t& operator[](size_t i)             { return val[i]; }\n    inline const limb_t& operator[](size_t i) const { return val[i]; }\n\n    static const size_t n = sizeof(vec384)/sizeof(limb_t);\npublic:\n    static const size_t nbits = N;\n    static constexpr size_t bit_length() { return N; }\n    static const unsigned int degree = 1;\n    typedef byte pow_t[384/8];\n    typedef blst_384_t mem_t;\n\n    inline blst_384_t() {}\n    inline blst_384_t(const vec384 p, bool align = false)\n    {\n        if (align)\n            vec_left_align(val, p, N);\n        else\n            vec_copy(val, p, sizeof(val));\n    }\n    inline blst_384_t(uint64_t a)\n    {\n        vec_zero(val, sizeof(val));\n        val[0] = a;\n        if (a) to();\n    }\n    inline blst_384_t(int a) : blst_384_t((uint64_t)a) {}\n\n#if defined(__CUDACC__) || defined(__HIPCC__)\n# if __cplusplus < 201402L && _MSVC_LANG-0 < 201402L\n#  error \"C++ >= 14 is required to compile <blst>/src/blst_t.hpp for CUDA\"\n# endif\n    template<typename... Ts>\n    constexpr blst_384_t(limb_t a0, Ts... arr)\n    {\n        limb_t temp[11] = {arr...};\n\n        if (sizeof...(arr) < 6) {\n            val[0] = a0;\n            val[1] = temp[0];\n            val[2] = temp[1];\n            val[3] = temp[2];\n            val[4] = temp[3];\n            val[5] = temp[4];\n        } else {\n            val[0] = a0      | (temp[0] << 32);\n            val[1] = temp[1] | (temp[2] << 32);\n            val[2] = temp[3] | (temp[4] << 32);\n            val[3] = temp[5] | (temp[6] << 32);\n            val[4] = temp[7] | (temp[8] << 32);\n            val[5] = temp[9] | (temp[10] << 32);\n        }\n    }\n#else\n    template<typename... Ts>\n    constexpr blst_384_t(limb_t a0, Ts... arr) : val{a0, arr...} {}\n#endif\n\n    inline void to_scalar(pow_t& scalar) const\n    {\n        const union {\n            long one;\n            char little;\n        } is_endian = { 1 };\n\n        if ((size_t)scalar%sizeof(limb_t) == 0 && is_endian.little) {\n            from_mont_384((limb_t *)scalar, val, MOD, M0);\n        } else {\n            vec384 out;\n            from_mont_384(out, val, MOD, M0);\n            le_bytes_from_limbs(scalar, out, sizeof(pow_t));\n            vec_zero(out, sizeof(out));\n        }\n    }\n\n    static inline const blst_384_t& one()\n    {   return *reinterpret_cast<const blst_384_t*>(ONE);   }\n\n    static inline blst_384_t one(bool or_zero)\n    {\n        blst_384_t ret;\n        limb_t mask = ~((limb_t)0 - or_zero);\n        for (size_t i = 0; i < n; i++)\n            ret[i] = ONE[i] & mask;\n        return ret;\n    }\n\n    inline blst_384_t& to()\n    {   mul_mont_384(val, RR, val, MOD, M0);        return *this;   }\n    inline blst_384_t& from()\n    {   from_mont_384(val, val, MOD, M0);           return *this;   }\n\n    inline void store(limb_t *p) const\n    {   vec_copy(p, val, sizeof(val));   }\n\n    inline blst_384_t& operator+=(const blst_384_t& b)\n    {   add_mod_384(val, val, b, MOD);              return *this;   }\n    friend inline blst_384_t operator+(const blst_384_t& a, const blst_384_t& b)\n    {\n        blst_384_t ret;\n        add_mod_384(ret, a, b, MOD);\n        return ret;\n    }\n\n    inline blst_384_t& operator<<=(unsigned l)\n    {   lshift_mod_384(val, val, l, MOD);           return *this;   }\n    friend inline blst_384_t operator<<(const blst_384_t& a, unsigned l)\n    {\n        blst_384_t ret;\n        lshift_mod_384(ret, a, l, MOD);\n        return ret;\n    }\n\n    inline blst_384_t& operator>>=(unsigned r)\n    {   rshift_mod_384(val, val, r, MOD);           return *this;   }\n    friend inline blst_384_t operator>>(const blst_384_t& a, unsigned r)\n    {\n        blst_384_t ret;\n        rshift_mod_384(ret, a, r, MOD);\n        return ret;\n    }\n\n    inline blst_384_t& operator-=(const blst_384_t& b)\n    {   sub_mod_384(val, val, b, MOD);              return *this;   }\n    friend inline blst_384_t operator-(const blst_384_t& a, const blst_384_t& b)\n    {\n        blst_384_t ret;\n        sub_mod_384(ret, a, b, MOD);\n        return ret;\n    }\n\n    inline blst_384_t& cneg(bool flag)\n    {   cneg_mod_384(val, val, flag, MOD);          return *this;   }\n    friend inline blst_384_t cneg(const blst_384_t& a, bool flag)\n    {\n        blst_384_t ret;\n        cneg_mod_384(ret, a, flag, MOD);\n        return ret;\n    }\n    friend inline blst_384_t operator-(const blst_384_t& a)\n    {\n        blst_384_t ret;\n        cneg_mod_384(ret, a, true, MOD);\n        return ret;\n    }\n\n    inline blst_384_t& operator*=(const blst_384_t& a)\n    {\n        if (this == &a) sqr_mont_384(val, val, MOD, M0);\n        else            mul_mont_384(val, val, a, MOD, M0);\n        return *this;\n    }\n    friend inline blst_384_t operator*(const blst_384_t& a, const blst_384_t& b)\n    {\n        blst_384_t ret;\n        if (&a == &b)   sqr_mont_384(ret, a, MOD, M0);\n        else            mul_mont_384(ret, a, b, MOD, M0);\n        return ret;\n    }\n\n    // simplified exponentiation, but mind the ^ operator's precedence!\n    friend inline blst_384_t operator^(const blst_384_t& a, unsigned p)\n    {\n        if (p < 2) {\n            abort();\n        } else if (p == 2) {\n            blst_384_t ret;\n            sqr_mont_384(ret, a, MOD, M0);\n            return ret;\n        } else {\n            blst_384_t ret = a, sqr = a;\n            if ((p&1) == 0) {\n                do {\n                    sqr_mont_384(sqr, sqr, MOD, M0);\n                    p >>= 1;\n                } while ((p&1) == 0);\n                ret = sqr;\n            }\n            for (p >>= 1; p; p >>= 1) {\n                sqr_mont_384(sqr, sqr, MOD, M0);\n                if (p&1)\n                    mul_mont_384(ret, ret, sqr, MOD, M0);\n            }\n            return ret;\n        }\n    }\n    inline blst_384_t& operator^=(unsigned p)\n    {\n        if (p < 2) {\n            abort();\n        } else if (p == 2) {\n            sqr_mont_384(val, val, MOD, M0);\n            return *this;\n        }\n        return *this = *this^p;\n    }\n    inline blst_384_t operator()(unsigned p)\n    {   return *this^p;   }\n    friend inline blst_384_t sqr(const blst_384_t& a)\n    {   return a^2;   }\n\n    inline bool is_one() const\n    {   return vec_is_equal(val, ONE, sizeof(val));   }\n\n    inline int is_zero() const\n    {   return vec_is_zero(val, sizeof(val));   }\n\n    inline void zero()\n    {   vec_zero(val, sizeof(val));   }\n\n    friend inline blst_384_t czero(const blst_384_t& a, int set_z)\n    {   blst_384_t ret;\n        const vec384 zero = { 0 };\n        vec_select(ret, zero, a, sizeof(ret), set_z);\n        return ret;\n    }\n\n    static inline blst_384_t csel(const blst_384_t& a, const blst_384_t& b,\n                                  int sel_a)\n    {   blst_384_t ret;\n        vec_select(ret, a, b, sizeof(ret), sel_a);\n        return ret;\n    }\n\n    blst_384_t reciprocal() const\n    {\n        static const blst_384_t MODx{MOD, true};\n        union { vec768 x; vec384 r[2]; } temp;\n\n        ct_inverse_mod_384(temp.x, val, MOD, MODx);\n        redc_mont_384(temp.r[0], temp.x, MOD, M0);\n        mul_mont_384(temp.r[0], temp.r[0], RR, MOD, M0);\n\n        return *reinterpret_cast<blst_384_t*>(temp.r[0]);\n    }\n    friend inline blst_384_t operator/(unsigned one, const blst_384_t& a)\n    {\n        if (one == 1)\n            return a.reciprocal();\n        abort();\n    }\n    friend inline blst_384_t operator/(const blst_384_t& a, const blst_384_t& b)\n    {   return a * b.reciprocal();   }\n    inline blst_384_t& operator/=(const blst_384_t& a)\n    {   return *this *= a.reciprocal();   }\n\n    inline blst_384_t(const char *hexascii)\n    {   limbs_from_hexascii(val, sizeof(val), hexascii); to();   }\n\n    friend inline bool operator==(const blst_384_t& a, const blst_384_t& b)\n    {   return vec_is_equal(a, b, sizeof(vec384));   }\n    friend inline bool operator!=(const blst_384_t& a, const blst_384_t& b)\n    {   return !vec_is_equal(a, b, sizeof(vec384));   }\n\n    template<class OStream, typename Traits = typename OStream::traits_type>\n    friend OStream& operator<<(OStream& os, const blst_384_t& obj)\n    {\n        unsigned char be[sizeof(obj)];\n        char buf[2+2*sizeof(obj)+1], *str = buf;\n\n        be_bytes_from_limbs(be, blst_384_t{obj}.from(), sizeof(obj));\n\n        *str++ = '0', *str++ = 'x';\n        for (size_t i = 0; i < sizeof(obj); i++)\n            *str++ = hex_from_nibble(be[i]>>4), *str++ = hex_from_nibble(be[i]);\n        *str = '\\0';\n\n        return os << buf;\n    }\n};\n\ntemplate<const size_t N, const vec256 MOD, const limb_t M0,\n                         const vec256 RR, const vec256 ONE>\nclass blst_256_t {\n    vec256 val;\n\n    inline operator const limb_t*() const           { return val;    }\n    inline operator limb_t*()                       { return val;    }\n    inline limb_t& operator[](size_t i)             { return val[i]; }\n    inline const limb_t& operator[](size_t i) const { return val[i]; }\n\n    static const size_t n = sizeof(vec256)/sizeof(limb_t);\npublic:\n    static const size_t nbits = N;\n    static constexpr size_t bit_length() { return N; }\n    static const unsigned int degree = 1;\n    typedef byte pow_t[256/8];\n    typedef blst_256_t mem_t;\n\n    inline blst_256_t() {}\n    inline blst_256_t(const vec256 p, bool align = false)\n    {\n        if (align)\n            vec_left_align(val, p, N);\n        else\n            vec_copy(val, p, sizeof(val));\n    }\n    inline blst_256_t(uint64_t a)\n    {\n        vec_zero(val, sizeof(val));\n        val[0] = a;\n        if (a) to();\n    }\n    inline blst_256_t(int a) : blst_256_t((uint64_t)a) {}\n\n#if defined(__CUDACC__) || defined(__HIPCC__)\n# if __cplusplus < 201402L && _MSVC_LANG-0 < 201402L\n#  error \"C++ >= 14 is required to compile <blst>/src/blst_t.hpp for CUDA\"\n# endif\n    template<typename... Ts>\n    constexpr blst_256_t(limb_t a0, Ts... arr)\n    {\n        limb_t temp[7] = {arr...};\n\n        if (sizeof...(arr) < 4) {\n            val[0] = a0;\n            val[1] = temp[0];\n            val[2] = temp[1];\n            val[3] = temp[2];\n        } else {\n            val[0] = a0      | (temp[0] << 32);\n            val[1] = temp[1] | (temp[2] << 32);\n            val[2] = temp[3] | (temp[4] << 32);\n            val[3] = temp[5] | (temp[6] << 32);\n        }\n    }\n#else\n    template<typename... Ts>\n    constexpr blst_256_t(limb_t a0, Ts... arr) : val{a0, arr...} {}\n#endif\n\n    inline void to_scalar(pow_t& scalar) const\n    {\n        const union {\n            long one;\n            char little;\n        } is_endian = { 1 };\n\n        if ((size_t)scalar%sizeof(limb_t) == 0 && is_endian.little) {\n            from_mont_256((limb_t *)scalar, val, MOD, M0);\n        } else {\n            vec256 out;\n            from_mont_256(out, val, MOD, M0);\n            le_bytes_from_limbs(scalar, out, sizeof(pow_t));\n            vec_zero(out, sizeof(out));\n        }\n    }\n\n    static inline const blst_256_t& one()\n    {   return *reinterpret_cast<const blst_256_t*>(ONE);   }\n\n    static inline blst_256_t one(bool or_zero)\n    {\n        blst_256_t ret;\n        limb_t mask = ~((limb_t)0 - or_zero);\n        for (size_t i = 0; i < n; i++)\n            ret[i] = ONE[i] & mask;\n        return ret;\n    }\n\n    inline blst_256_t& to()\n    {   mul_mont_sparse_256(val, val, RR, MOD, M0); return *this;   }\n    inline blst_256_t& to(const uint64_t a[2*n])\n    {\n        mul_mont_sparse_256(val, RR, (const limb_t*)(a + n), MOD, M0);\n        add_mod_256(val, val, (const limb_t*)a, MOD);\n        mul_mont_sparse_256(val, RR, val, MOD, M0);\n\n        return *this;\n    }\n    blst_256_t& to(const unsigned char* bytes, size_t n, bool le = false)\n    {\n        vec_zero(val, sizeof(val));\n\n        vec256 digit;\n        size_t rem = (n - 1) % 32 + 1;\n        n -= rem;\n\n        if (le) {\n            limbs_from_le_bytes(val, bytes += n, rem);\n            mul_mont_sparse_256(val, RR, val, MOD, M0);\n            while (n) {\n                limbs_from_le_bytes(digit, bytes -= 32, 32);\n                add_mod_256(val, val, digit, MOD);\n                mul_mont_sparse_256(val, RR, val, MOD, M0);\n                n -= 32;\n            }\n        } else {\n            limbs_from_be_bytes(val, bytes, rem);\n            mul_mont_sparse_256(val, RR, val, MOD, M0);\n            bytes += rem;\n            while (n) {\n                limbs_from_be_bytes(digit, bytes, 32);\n                add_mod_256(val, val, digit, MOD);\n                mul_mont_sparse_256(val, RR, val, MOD, M0);\n                bytes += 32;\n                n -= 32;\n            }\n        }\n\n        return *this;\n    }\n\n    inline blst_256_t& from()\n    {   from_mont_256(val, val, MOD, M0); return *this;   }\n    inline blst_256_t& from(const uint64_t a[2*n])\n    {\n        redc_mont_256(val, (const limb_t*)a, MOD, M0);\n        mul_mont_sparse_256(val, RR, val, MOD, M0);\n\n        return *this;\n    }\n    inline blst_256_t& from(const unsigned char *bytes, size_t n, bool le = false)\n    {\n        if (n > 64)\n            return to(bytes, n, le).from();\n\n        if (n > 32) {\n            vec512 temp{0};\n            if (le) limbs_from_le_bytes(temp, bytes, n);\n            else    limbs_from_be_bytes(temp, bytes, n);\n            redc_mont_256(val, temp, MOD, M0);\n            mul_mont_sparse_256(val, RR, val, MOD, M0);\n        } else {\n            vec_zero(val, sizeof(val));\n            if (le) limbs_from_le_bytes(val, bytes, n);\n            else    limbs_from_be_bytes(val, bytes, n);\n            mul_mont_sparse_256(val, ONE, val, MOD, M0);\n        }\n\n        return *this;\n    }\n\n    inline void store(limb_t *p) const\n    {   vec_copy(p, val, sizeof(val));   }\n\n    inline blst_256_t& operator+=(const blst_256_t& b)\n    {   add_mod_256(val, val, b, MOD);              return *this;   }\n    friend inline blst_256_t operator+(const blst_256_t& a, const blst_256_t& b)\n    {\n        blst_256_t ret;\n        add_mod_256(ret, a, b, MOD);\n        return ret;\n    }\n\n    inline blst_256_t& operator<<=(unsigned l)\n    {   lshift_mod_256(val, val, l, MOD);           return *this;   }\n    friend inline blst_256_t operator<<(const blst_256_t& a, unsigned l)\n    {\n        blst_256_t ret;\n        lshift_mod_256(ret, a, l, MOD);\n        return ret;\n    }\n\n    inline blst_256_t& operator>>=(unsigned r)\n    {   rshift_mod_256(val, val, r, MOD);           return *this;   }\n    friend inline blst_256_t operator>>(const blst_256_t& a, unsigned r)\n    {\n        blst_256_t ret;\n        rshift_mod_256(ret, a, r, MOD);\n        return ret;\n    }\n\n    inline blst_256_t& operator-=(const blst_256_t& b)\n    {   sub_mod_256(val, val, b, MOD);              return *this;   }\n    friend inline blst_256_t operator-(const blst_256_t& a, const blst_256_t& b)\n    {\n        blst_256_t ret;\n        sub_mod_256(ret, a, b, MOD);\n        return ret;\n    }\n\n    inline blst_256_t& cneg(bool flag)\n    {   cneg_mod_256(val, val, flag, MOD);          return *this;   }\n    friend inline blst_256_t cneg(const blst_256_t& a, bool flag)\n    {\n        blst_256_t ret;\n        cneg_mod_256(ret, a, flag, MOD);\n        return ret;\n    }\n    friend inline blst_256_t operator-(const blst_256_t& a)\n    {\n        blst_256_t ret;\n        cneg_mod_256(ret, a, true, MOD);\n        return ret;\n    }\n\n    inline blst_256_t& operator*=(const blst_256_t& a)\n    {\n        if (this == &a) sqr_mont_sparse_256(val, val, MOD, M0);\n        else            mul_mont_sparse_256(val, val, a, MOD, M0);\n        return *this;\n    }\n    friend inline blst_256_t operator*(const blst_256_t& a, const blst_256_t& b)\n    {\n        blst_256_t ret;\n        if (&a == &b)   sqr_mont_sparse_256(ret, a, MOD, M0);\n        else            mul_mont_sparse_256(ret, a, b, MOD, M0);\n        return ret;\n    }\n\n    // simplified exponentiation, but mind the ^ operator's precedence!\n    friend inline blst_256_t operator^(const blst_256_t& a, unsigned p)\n    {\n        if (p < 2) {\n            abort();\n        } else if (p == 2) {\n            blst_256_t ret;\n            sqr_mont_sparse_256(ret, a, MOD, M0);\n            return ret;\n        } else {\n            blst_256_t ret = a, sqr = a;\n            if ((p&1) == 0) {\n                do {\n                    sqr_mont_sparse_256(sqr, sqr, MOD, M0);\n                    p >>= 1;\n                } while ((p&1) == 0);\n                ret = sqr;\n            }\n            for (p >>= 1; p; p >>= 1) {\n                sqr_mont_sparse_256(sqr, sqr, MOD, M0);\n                if (p&1)\n                    mul_mont_sparse_256(ret, ret, sqr, MOD, M0);\n            }\n            return ret;\n        }\n    }\n    inline blst_256_t& operator^=(unsigned p)\n    {\n        if (p < 2) {\n            abort();\n        } else if (p == 2) {\n            sqr_mont_sparse_256(val, val, MOD, M0);\n            return *this;\n        }\n        return *this = *this^p;\n    }\n    inline blst_256_t operator()(unsigned p)\n    {   return *this^p;   }\n    friend inline blst_256_t sqr(const blst_256_t& a)\n    {   return a^2;   }\n\n    inline bool is_one() const\n    {   return vec_is_equal(val, ONE, sizeof(val));   }\n\n    inline int is_zero() const\n    {   return vec_is_zero(val, sizeof(val));   }\n\n    inline void zero()\n    {   vec_zero(val, sizeof(val));   }\n\n    friend inline blst_256_t czero(const blst_256_t& a, int set_z)\n    {   blst_256_t ret;\n        const vec256 zero = { 0 };\n        vec_select(ret, zero, a, sizeof(ret), set_z);\n        return ret;\n    }\n\n    static inline blst_256_t csel(const blst_256_t& a, const blst_256_t& b,\n                                  int sel_a)\n    {   blst_256_t ret;\n        vec_select(ret, a, b, sizeof(ret), sel_a);\n        return ret;\n    }\n\n    blst_256_t reciprocal() const\n    {\n        static const blst_256_t MODx{MOD, true};\n        union { vec512 x; vec256 r[2]; } temp;\n\n        ct_inverse_mod_256(temp.x, val, MOD, MODx);\n        redc_mont_256(temp.r[0], temp.x, MOD, M0);\n        mul_mont_sparse_256(temp.r[0], temp.r[0], RR, MOD, M0);\n\n        return *reinterpret_cast<blst_256_t*>(temp.r[0]);\n    }\n    friend inline blst_256_t operator/(int one, const blst_256_t& a)\n    {\n        if (one == 1)\n            return a.reciprocal();\n        abort();\n    }\n    friend inline blst_256_t operator/(const blst_256_t& a, const blst_256_t& b)\n    {   return a * b.reciprocal();   }\n    inline blst_256_t& operator/=(const blst_256_t& a)\n    {   return *this *= a.reciprocal();   }\n\n    inline blst_256_t(const char *hexascii)\n    {   limbs_from_hexascii(val, sizeof(val), hexascii); to();   }\n\n    friend inline bool operator==(const blst_256_t& a, const blst_256_t& b)\n    {   return vec_is_equal(a, b, sizeof(vec256));   }\n    friend inline bool operator!=(const blst_256_t& a, const blst_256_t& b)\n    {   return !vec_is_equal(a, b, sizeof(vec256));   }\n\n    template<class OStream, typename Traits = typename OStream::traits_type>\n    friend OStream& operator<<(OStream& os, const blst_256_t& obj)\n    {\n        unsigned char be[sizeof(obj)];\n        char buf[2+2*sizeof(obj)+1], *str=buf;\n\n        be_bytes_from_limbs(be, blst_256_t{obj}.from(), sizeof(obj));\n\n        *str++ = '0', *str++ = 'x';\n        for (size_t i = 0; i < sizeof(obj); i++)\n            *str++ = hex_from_nibble(be[i]>>4), *str++ = hex_from_nibble(be[i]);\n        *str = '\\0';\n\n        return os << buf;\n    }\n};\n#endif\n"
  },
  {
    "path": "src/bulk_addition.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"fields.h\"\n#include \"point.h\"\n\n/*\n * This implementation uses explicit addition formula:\n *\n * λ = (Y₂-Y₁)/(X₂-X₁)\n * X₃ = λ²-(X₁+X₂)\n * Y₃ = λ⋅(X₁-X₃)-Y₁\n *\n * But since we don't know if we'll have to add point to itself, we need\n * to eventually resort to corresponding doubling formula:\n *\n * λ = 3X₁²/2Y₁\n * X₃ = λ²-2X₁\n * Y₃ = λ⋅(X₁-X₃)-Y₁\n *\n * The formulae use prohibitively expensive inversion, but whenever we\n * have a lot of affine points to accumulate, we can amortize the cost\n * by applying Montgomery's batch inversion approach. As a result,\n * asymptotic[!] per-point cost for addition is as small as 5M+1S. For\n * comparison, ptype##_dadd_affine takes 8M+5S. In practice, all things\n * considered, the improvement coefficient varies from 60% to 85%\n * depending on platform and curve.\n *\n * THIS IMPLEMENTATION IS *NOT* CONSTANT-TIME. [But if there is an\n * application that requires constant time-ness, speak up!]\n */\n\n/*\n * Calculate λ's numerator and denominator.\n *\n * input:\tA\tx1\ty1\t-\n *\t\tB\tx2\ty2\t-\n * output:\n * if A!=B:\tA\tx1\ty1\t(x2-x1)*mul_acc\n *\t\tB\tx2+x1\ty2-y1\t(x2-x1)\n *\n * if A==B:\tA\tx\ty\t2y*mul_acc\n *\t\tB\t2x\t3*x^2\t2y\n *\n * if A==-B:\tA\t0\t0\t1*mul_acc\n *\t\tB\t0\t3*x^2\t0\n */\n#define HEAD(ptype, bits, field, one) \\\nstatic void ptype##_head(ptype AB[2], const vec##bits mul_acc) \\\n{ \\\n    ptype *A = AB, *B = AB+1; \\\n    limb_t inf = vec_is_zero(A, sizeof(ptype##_affine)) | \\\n                 vec_is_zero(B, sizeof(ptype##_affine));  \\\n    static const vec##bits zero = { 0 }; \\\n\\\n    sub_##field(B->Z, B->X, A->X);\t\t/* X2-X1  */ \\\n    add_##field(B->X, B->X, A->X);\t\t/* X2+X1  */ \\\n    add_##field(A->Z, B->Y, A->Y);\t\t/* Y2+Y1  */ \\\n    sub_##field(B->Y, B->Y, A->Y);\t\t/* Y2-Y1  */ \\\n    if (vec_is_zero(B->Z, sizeof(B->Z))) {\t/* X2==X1 */ \\\n        inf = vec_is_zero(A->Z, sizeof(A->Z));\t\\\n        vec_select(B->X, A->Z, B->X, sizeof(B->X), inf); \\\n        sqr_##field(B->Y, A->X);\t\t\\\n        mul_by_3_##field(B->Y, B->Y);\t\t/* 3*X1^2 */ \\\n        vec_copy(B->Z, A->Z, sizeof(B->Z));\t/* 2*Y1   */ \\\n    }\t\t\t\t\t\t/* B->Y is numenator    */ \\\n\t\t\t\t\t\t/* B->Z is denominator  */ \\\n    vec_select(A->X, B->X, A->X, sizeof(A->X), inf); \\\n    vec_select(A->Y, A->Z, A->Y, sizeof(A->Y), inf); \\\n    vec_select(A->Z, one,  B->Z, sizeof(A->Z), inf); \\\n    vec_select(B->Z, zero, B->Z, sizeof(B->Z), inf); \\\n    if (mul_acc != NULL) \\\n        mul_##field(A->Z, A->Z, mul_acc);\t/* chain multiplication */\\\n}\n\n/*\n * Calculate λ and resulting coordinates.\n *\n * input:\tA\t\tx1\t\t\ty1\t\t-\n *\t\tB\t\tx2+x1\t\t\tnominator\t-\n * \t\tlambda\t\t1/denominator\n * output:\tD\t\tx3=(nom/den)^2-(x2+x1)\ty3=(nom/den)(x1-x3)-y1\n */\n#define TAIL(ptype, bits, field, one) \\\nstatic void ptype##_tail(ptype *D, ptype AB[2], vec##bits lambda) \\\n{ \\\n    ptype *A = AB, *B = AB+1; \\\n    vec##bits llambda; \\\n    limb_t inf = vec_is_zero(B->Z, sizeof(B->Z)); \\\n\\\n    mul_##field(lambda, lambda, B->Y);\t\t/* λ = (Y2-Y1)/(X2-X1)  */ \\\n\t\t\t\t\t\t/* alt. 3*X1^2/2*Y1     */ \\\n    sqr_##field(llambda, lambda); \\\n    sub_##field(D->X, llambda, B->X);\t\t/* X3 = λ^2-X1-X2       */ \\\n\\\n    sub_##field(D->Y, A->X, D->X);   \\\n    mul_##field(D->Y, D->Y, lambda); \\\n    sub_##field(D->Y, D->Y, A->Y);\t\t/* Y3 = λ*(X1-X3)-Y1    */ \\\n\\\n    vec_select(D->X, A->X, D->X, 2*sizeof(D->X), inf); \\\n    vec_select(B->Z, one, B->Z, sizeof(B->Z), inf); \\\n}\n\n/*\n * |points[]| is volatile buffer with |X|s and |Y|s initially holding\n * input affine coordinates, and with |Z|s being used as additional\n * temporary storage [unrelated to Jacobian coordinates]. |sum| is\n * in-/output, initialize to infinity accordingly.\n */\n#define ADDITION_BTREE(prefix, ptype, bits, field, one) \\\nHEAD(ptype, bits, field, one) \\\nTAIL(ptype, bits, field, one) \\\nstatic void ptype##s_accumulate(ptype *sum, ptype points[], size_t n) \\\n{ \\\n    ptype *dst; \\\n    void *mul_acc; \\\n    size_t i; \\\n\\\n    while (n >= 16) { \\\n        if (n & 1) \\\n            ptype##_dadd_affine(sum, sum, (const ptype##_affine *)points++); \\\n        n /= 2; \\\n        for (mul_acc = NULL, i = n; i--; mul_acc = points->Z, points += 2) \\\n            ptype##_head(points, mul_acc); \\\n\\\n        reciprocal_##field(points[-2].Z, points[-2].Z); /* 1/∏ Zi */ \\\n\\\n        for (dst = points, i = n; --i;) { \\\n            dst--; points -= 2; \\\n            mul_##field(points[-2].Z, points[0].Z, points[-2].Z); \\\n            ptype##_tail(dst, points, points[-2].Z); \\\n            mul_##field(points[-2].Z, points[0].Z, points[1].Z); \\\n        } \\\n        dst--; points -= 2; \\\n        ptype##_tail(dst, points, points[0].Z); \\\n        points = dst; \\\n    } \\\n    while (n--) \\\n        ptype##_dadd_affine(sum, sum, (const ptype##_affine *)points++); \\\n} \\\n\\\nvoid prefix##s_add(ptype *sum, const ptype##_affine *const points[], \\\n                               size_t npoints) \\\n{ \\\n    const size_t stride = SCRATCH_LIMIT / sizeof(ptype); \\\n    ptype *scratch = alloca((npoints > stride ? stride : npoints) * \\\n                            sizeof(ptype)); \\\n    const ptype##_affine *point = NULL; \\\n\\\n    vec_zero(sum, sizeof(*sum)); \\\n    while (npoints) { \\\n        size_t i, j = npoints > stride ? stride : npoints; \\\n        for (i=0; i<j; i++) { \\\n            point = *points ? *points++ : point+1; \\\n            vec_copy(&scratch[i], point, sizeof(*point)); \\\n        } \\\n        ptype##s_accumulate(sum, scratch, j); \\\n        npoints -= j; \\\n    } \\\n}\n\n#ifndef SCRATCH_LIMIT\n# ifdef __wasm__\n#  define SCRATCH_LIMIT (45 * 1024)\n# else\n   /* Performance with 144K scratch is within 1-2-3% from optimal */\n#  define SCRATCH_LIMIT (144 * 1024)\n# endif\n#endif\n\nADDITION_BTREE(blst_p1, POINTonE1, 384, fp, BLS12_381_Rx.p2)\n\nADDITION_BTREE(blst_p2, POINTonE2, 384x, fp2, BLS12_381_Rx.p2)\n"
  },
  {
    "path": "src/bytes.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_BYTES_H__\n#define __BLS12_381_ASM_BYTES_H__\n\nstatic inline void bytes_zero(unsigned char *a, size_t num)\n{\n    size_t i;\n\n    for (i = 0; i < num; i++)\n        a[i] = 0;\n}\n\nstatic inline void limbs_from_be_bytes(limb_t *restrict ret,\n                                       const unsigned char *in, size_t n)\n{\n    limb_t limb = 0;\n\n    while(n--) {\n        limb <<= 8;\n        limb |= *in++;\n        /*\n         * 'if (n % sizeof(limb_t) == 0)' is omitted because it's cheaper\n         * to perform redundant stores than to pay penalty for\n         * mispredicted branch. Besides, some compilers unroll the\n         * loop and remove redundant stores to 'restrict'-ed storage...\n         */\n        ret[n / sizeof(limb_t)] = limb;\n    }\n}\n\nstatic inline void be_bytes_from_limbs(unsigned char *out, const limb_t *in,\n                                       size_t n)\n{\n    limb_t limb;\n\n    while(n--) {\n        limb = in[n / sizeof(limb_t)];\n        *out++ = (unsigned char)(limb >> (8 * (n % sizeof(limb_t))));\n    }\n}\n\nstatic inline void limbs_from_le_bytes(limb_t *restrict ret,\n                                       const unsigned char *in, size_t n)\n{\n    limb_t limb = 0;\n\n    while(n--) {\n        limb <<= 8;\n        limb |= in[n];\n        /*\n         * 'if (n % sizeof(limb_t) == 0)' is omitted because it's cheaper\n         * to perform redundant stores than to pay penalty for\n         * mispredicted branch. Besides, some compilers unroll the\n         * loop and remove redundant stores to 'restrict'-ed storage...\n         */\n        ret[n / sizeof(limb_t)] = limb;\n    }\n}\n\nstatic inline void le_bytes_from_limbs(unsigned char *out, const limb_t *in,\n                                       size_t n)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n    limb_t limb;\n    size_t i, j, r;\n\n    if ((uptr_t)out == (uptr_t)in && is_endian.little)\n        return;\n\n    r = n % sizeof(limb_t);\n    n /= sizeof(limb_t);\n\n    for(i = 0; i < n; i++) {\n        for (limb = in[i], j = 0; j < sizeof(limb_t); j++, limb >>= 8)\n            *out++ = (unsigned char)limb;\n    }\n    if (r) {\n        for (limb = in[i], j = 0; j < r; j++, limb >>= 8)\n            *out++ = (unsigned char)limb;\n    }\n}\n\nstatic inline char hex_from_nibble(unsigned char nibble)\n{\n    int mask = (9 - (nibble &= 0xf)) >> 31;\n    return (char)(nibble + ((('a'-10) & mask) | ('0' & ~mask)));\n}\n\nstatic unsigned char nibble_from_hex(char c)\n{\n    int mask, ret;\n\n    mask = (('a'-c-1) & (c-1-'f')) >> 31;\n    ret  = (10 + c - 'a') & mask;\n    mask = (('A'-c-1) & (c-1-'F')) >> 31;\n    ret |= (10 + c - 'A') & mask;\n    mask = (('0'-c-1) & (c-1-'9')) >> 31;\n    ret |= (c - '0') & mask;\n    mask = ((ret-1) & ~mask) >> 31;\n    ret |= 16 & mask;\n\n    return (unsigned char)ret;\n}\n\nstatic void bytes_from_hexascii(unsigned char *ret, size_t sz, const char *hex)\n{\n    size_t len;\n    unsigned char b = 0;\n\n    if (hex[0]=='0' && (hex[1]=='x' || hex[1]=='X'))\n        hex += 2;\n\n    for (len = 0; len<2*sz && nibble_from_hex(hex[len])<16; len++) ;\n\n    bytes_zero(ret, sz);\n\n    while(len--) {\n        b <<= 4;\n        b |= nibble_from_hex(*hex++);\n        if (len % 2 == 0)\n            ret[len / 2] = b;\n    }\n}\n\nstatic void limbs_from_hexascii(limb_t *ret, size_t sz, const char *hex)\n{\n    size_t len;\n    limb_t limb = 0;\n\n    if (hex[0]=='0' && (hex[1]=='x' || hex[1]=='X'))\n        hex += 2;\n\n    for (len = 0; len<2*sz && nibble_from_hex(hex[len])<16; len++) ;\n\n    vec_zero(ret, sz);\n\n    while(len--) {\n        limb <<= 4;\n        limb |= nibble_from_hex(*hex++);\n        if (len % (2*sizeof(limb_t)) == 0)\n            ret[len / (2*sizeof(limb_t))] = limb;\n    }\n}\n\n#endif\n"
  },
  {
    "path": "src/client_min_pk.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"keygen.c\"\n#include \"e2.c\"\n#include \"hash_to_field.c\"\n#include \"map_to_g2.c\"\n#include \"e1.c\"\n#include \"exp.c\"\n#include \"sqrt.c\"\n#include \"recip.c\"\n#include \"consts.c\"\n#include \"vect.c\"\n#include \"exports.c\"\n"
  },
  {
    "path": "src/client_min_sig.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"keygen.c\"\n#include \"e1.c\"\n#include \"hash_to_field.c\"\n#include \"map_to_g1.c\"\n#include \"e2.c\"\n#include \"exp.c\"\n#include \"sqrt.c\"\n#include \"recip.c\"\n#include \"consts.c\"\n#include \"vect.c\"\n#include \"exports.c\"\n"
  },
  {
    "path": "src/consts.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"consts.h\"\n\n/* z = -0xd201000000010000 */\nconst vec384 BLS12_381_P = {    /* (z-1)^2 * (z^4 - z^2 + 1)/3 + z */\n    TO_LIMB_T(0xb9feffffffffaaab), TO_LIMB_T(0x1eabfffeb153ffff),\n    TO_LIMB_T(0x6730d2a0f6b0f624), TO_LIMB_T(0x64774b84f38512bf),\n    TO_LIMB_T(0x4b1ba7b6434bacd7), TO_LIMB_T(0x1a0111ea397fe69a)\n};\nconst limb_t BLS12_381_p0 = (limb_t)0x89f3fffcfffcfffd;  /* -1/P */\n\nconst radix384 BLS12_381_Rx = { /* (1<<384)%P, \"radix\", one-in-Montgomery */\n  { { ONE_MONT_P },\n    { 0 } }\n};\n\nconst vec384 BLS12_381_RR = {   /* (1<<768)%P, \"radix\"^2, to-Montgomery */\n    TO_LIMB_T(0xf4df1f341c341746), TO_LIMB_T(0x0a76e6a609d104f1),\n    TO_LIMB_T(0x8de5476c4c95b6d5), TO_LIMB_T(0x67eb88a9939d83c0),\n    TO_LIMB_T(0x9a793e85b519952d), TO_LIMB_T(0x11988fe592cae3aa)\n};\n\nconst vec256 BLS12_381_r = {    /* z^4 - z^2 + 1, group order */\n    TO_LIMB_T(0xffffffff00000001), TO_LIMB_T(0x53bda402fffe5bfe),\n    TO_LIMB_T(0x3339d80809a1d805), TO_LIMB_T(0x73eda753299d7d48)\n};\n\nconst vec256 BLS12_381_rRR = {  /* (1<<512)%r, \"radix\"^2, to-Montgomery */\n    TO_LIMB_T(0xc999e990f3f29c6d), TO_LIMB_T(0x2b6cedcb87925c23),\n    TO_LIMB_T(0x05d314967254398f), TO_LIMB_T(0x0748d9d99f59ff11)\n};\n"
  },
  {
    "path": "src/consts.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_CONST_H__\n#define __BLS12_381_ASM_CONST_H__\n#include \"vect.h\"\n\nextern const vec384 BLS12_381_P;\nextern const limb_t BLS12_381_p0;\nstatic const limb_t p0 = (limb_t)0x89f3fffcfffcfffd;  /* -1/P */\ntypedef union { vec384 p12[12]; vec384x p2; vec384 p; } radix384;\nextern const radix384 BLS12_381_Rx; /* (1<<384)%P, \"radix\", one-in-Montgomery */\nextern const vec384 BLS12_381_RR;   /* (1<<768)%P, \"radix\"^2, to-Montgomery   */\n\n#define ONE_MONT_P TO_LIMB_T(0x760900000002fffd), \\\n                   TO_LIMB_T(0xebf4000bc40c0002), \\\n                   TO_LIMB_T(0x5f48985753c758ba), \\\n                   TO_LIMB_T(0x77ce585370525745), \\\n                   TO_LIMB_T(0x5c071a97a256ec6d), \\\n                   TO_LIMB_T(0x15f65ec3fa80e493)\n\n#define ZERO_384 (BLS12_381_Rx.p2[1])\n\nextern const vec256 BLS12_381_r;    /* order */\nstatic const limb_t r0 = (limb_t)0xfffffffeffffffff;  /* -1/r */\nextern const vec256 BLS12_381_rRR;  /* (1<<512)%r, \"radix\"^2, to-Montgomery   */\n\n#endif\n"
  },
  {
    "path": "src/cpuid.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#if (defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_C)) && !defined(_WIN32)\n__attribute__((visibility(\"hidden\")))\n#endif\nint __blst_platform_cap = 0;\n\n#if defined(__x86_64__) || defined(__x86_64) || (defined(_M_X64) && !defined(_M_ARM64EC))\n\n# if defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_C)\nstatic void __cpuidex(int info[4], int func, int sub)\n{\n    int eax, ebx, ecx, edx;\n\n    __asm__(\"cpuid\" : \"=a\"(eax), \"=b\"(ebx), \"=c\"(ecx), \"=d\"(edx)\n                    : \"a\"(func), \"c\"(sub));\n\n    info[0] = eax;\n    info[1] = ebx;\n    info[2] = ecx;\n    info[3] = edx;\n}\n# else\n#  include <intrin.h>\n# endif\n\n# if defined(__GNUC__) || defined(__clang__)\n__attribute__((constructor))\n# endif\nstatic int __blst_cpuid(void)\n{\n    int info[4], cap = 0;\n\n    __cpuidex(info, 0, 0);\n    if (info[0] > 6) {\n        __cpuidex(info, 7, 0);\n        cap |= (info[1]>>19) & 1; /* ADX */\n        cap |= (info[1]>>28) & 2; /* SHA */\n    }\n\n    __blst_platform_cap = cap;\n\n    return 0;\n}\n\n# if defined(_MSC_VER) && !defined(__clang__) && !defined(__BLST_DLL_MAIN__)\n#  pragma section(\".CRT$XCU\",read)\n__declspec(allocate(\".CRT$XCU\")) static int (*p)(void) = __blst_cpuid;\n# elif defined(__SUNPRO_C)\n#  pragma init(__blst_cpuid)\n# endif\n\n#elif defined(__aarch64__) || defined(__aarch64) || defined(_M_ARM64) || defined(_M_ARM64EC)\n\n# if defined(__linux__) && (defined(__GNUC__) || defined(__clang__))\nextern unsigned long getauxval(unsigned long type) __attribute__ ((weak));\n\n__attribute__((constructor))\nstatic int __blst_cpuid(void)\n{\n    int cap = 0;\n\n    if (getauxval) {\n        unsigned long hwcap_ce = getauxval(16);\n        cap = (hwcap_ce>>6) & 1; /* SHA256 */\n    }\n\n    __blst_platform_cap = cap;\n\n    return 0;\n}\n# elif defined(__APPLE__) && (defined(__GNUC__) || defined(__clang__))\n__attribute__((constructor))\nstatic int __blst_cpuid()\n{\n    __blst_platform_cap = 1; /* SHA256 */\n    return 0;\n}\n# elif defined(__FreeBSD__) && __FreeBSD__ >= 12\n#  include <sys/auxv.h>\n__attribute__((constructor))\nstatic int __blst_cpuid()\n{\n    unsigned long cap;\n\n    if (elf_aux_info(AT_HWCAP, &cap, sizeof(cap)) == 0)\n        __blst_platform_cap = (cap & HWCAP_SHA2) != 0;\n\n    return 0;\n}\n# elif defined(_WIN64)\nint IsProcessorFeaturePresent(int);\n\n#  if defined(__GNUC__) || defined(__clang__)\n__attribute__((constructor))\n#  endif\nstatic int __blst_cpuid(void)\n{\n    __blst_platform_cap = IsProcessorFeaturePresent(30); /* AES, SHA1, SHA2 */\n\n    return 0;\n}\n\n#  if defined(_MSC_VER) && !defined(__clang__) && !defined(__BLST_DLL_MAIN__)\n#   pragma section(\".CRT$XCU\",read)\n__declspec(allocate(\".CRT$XCU\")) static int (*p)(void) = __blst_cpuid;\n#  endif\n# endif\n\n#endif\n\n#if defined(_WIN64) && defined(__BLST_DLL_MAIN__)\n# define IsProcessorFeaturePresent mask_IsProcessorFeaturePresent\n# define WIN32_LEAN_AND_MEAN\n# include <windows.h>\n\nBOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD dwReason, LPVOID lpvReserved)\n{\n    if (dwReason == DLL_PROCESS_ATTACH) {\n        DisableThreadLibraryCalls(hinstDLL);\n        __blst_cpuid();\n    }\n\n    return TRUE;\n\n    (void)lpvReserved;\n}\n\n# if defined(_MSC_VER)\n/*\n * Even though we don't have memcpy/memset anywhere, MSVC compiler\n * generates calls to them as it recognizes corresponding patterns.\n */\n#pragma function(memcpy)\nvoid *memcpy(unsigned char *dst, const unsigned char *src, size_t n)\n{\n    void *ret = dst;\n\n    while(n--)\n        *dst++ = *src++;\n\n    return ret;\n}\n\n#pragma function(memset)\nvoid *memset(unsigned char *dst, int c, size_t n)\n{\n    void *ret = dst;\n\n    while(n--)\n        *dst++ = (unsigned char)c;\n\n    return ret;\n}\n# endif\n#endif\n"
  },
  {
    "path": "src/e1.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"point.h\"\n#include \"fields.h\"\n#include \"errors.h\"\n\n/*\n * y^2 = x^3 + B\n */\nstatic const vec384 B_E1 = {        /* (4 << 384) % P */\n    TO_LIMB_T(0xaa270000000cfff3), TO_LIMB_T(0x53cc0032fc34000a),\n    TO_LIMB_T(0x478fe97a6b0a807f), TO_LIMB_T(0xb1d37ebee6ba24d7),\n    TO_LIMB_T(0x8ec9733bbf78ab2f), TO_LIMB_T(0x09d645513d83de7e)\n};\n\nconst POINTonE1 BLS12_381_G1 = {    /* generator point [in Montgomery] */\n  /* (0x17f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905\n   *    a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb << 384) % P */\n  { TO_LIMB_T(0x5cb38790fd530c16), TO_LIMB_T(0x7817fc679976fff5),\n    TO_LIMB_T(0x154f95c7143ba1c1), TO_LIMB_T(0xf0ae6acdf3d0e747),\n    TO_LIMB_T(0xedce6ecc21dbf440), TO_LIMB_T(0x120177419e0bfb75) },\n  /* (0x08b3f481e3aaa0f1a09e30ed741d8ae4fcf5e095d5d00af6\n   *    00db18cb2c04b3edd03cc744a2888ae40caa232946c5e7e1 << 384) % P */\n  { TO_LIMB_T(0xbaac93d50ce72271), TO_LIMB_T(0x8c22631a7918fd8e),\n    TO_LIMB_T(0xdd595f13570725ce), TO_LIMB_T(0x51ac582950405194),\n    TO_LIMB_T(0x0e1c8c3fad0059c0), TO_LIMB_T(0x0bbc3efc5008a26a) },\n  { ONE_MONT_P }\n};\n\nconst POINTonE1 BLS12_381_NEG_G1 = { /* negative generator [in Montgomery] */\n  /* (0x17f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905\n   *    a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb << 384) % P */\n  { TO_LIMB_T(0x5cb38790fd530c16), TO_LIMB_T(0x7817fc679976fff5),\n    TO_LIMB_T(0x154f95c7143ba1c1), TO_LIMB_T(0xf0ae6acdf3d0e747),\n    TO_LIMB_T(0xedce6ecc21dbf440), TO_LIMB_T(0x120177419e0bfb75) },\n  /* (0x114d1d6855d545a8aa7d76c8cf2e21f267816aef1db507c9\n   *    6655b9d5caac42364e6f38ba0ecb751bad54dcd6b939c2ca << 384) % P */\n  { TO_LIMB_T(0xff526c2af318883a), TO_LIMB_T(0x92899ce4383b0270),\n    TO_LIMB_T(0x89d7738d9fa9d055), TO_LIMB_T(0x12caf35ba344c12a),\n    TO_LIMB_T(0x3cff1b76964b5317), TO_LIMB_T(0x0e44d2ede9774430) },\n  { ONE_MONT_P }\n};\n\nstatic inline void mul_by_b_onE1(vec384 out, const vec384 in)\n{   lshift_fp(out, in, 2);   }\n\nstatic inline void mul_by_4b_onE1(vec384 out, const vec384 in)\n{   lshift_fp(out, in, 4);   }\n\nstatic void POINTonE1_cneg(POINTonE1 *p, bool_t cbit)\n{   cneg_fp(p->Y, p->Y, cbit);   }\n\nvoid blst_p1_cneg(POINTonE1 *a, int cbit)\n{   POINTonE1_cneg(a, is_zero(cbit) ^ 1);   }\n\nstatic void POINTonE1_from_Jacobian(POINTonE1 *out, const POINTonE1 *in)\n{\n    vec384 Z, ZZ;\n    limb_t inf = vec_is_zero(in->Z, sizeof(in->Z));\n\n    reciprocal_fp(Z, in->Z);                            /* 1/Z   */\n\n    sqr_fp(ZZ, Z);\n    mul_fp(out->X, in->X, ZZ);                          /* X = X/Z^2 */\n\n    mul_fp(ZZ, ZZ, Z);\n    mul_fp(out->Y, in->Y, ZZ);                          /* Y = Y/Z^3 */\n\n    vec_select(out->Z, in->Z, BLS12_381_G1.Z,\n                       sizeof(BLS12_381_G1.Z), inf);    /* Z = inf ? 0 : 1 */\n}\n\nvoid blst_p1_from_jacobian(POINTonE1 *out, const POINTonE1 *a)\n{   POINTonE1_from_Jacobian(out, a);   }\n\nstatic void POINTonE1_to_affine(POINTonE1_affine *out, const POINTonE1 *in)\n{\n    POINTonE1 p;\n\n    if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) {\n        POINTonE1_from_Jacobian(&p, in);\n        in = &p;\n    }\n    vec_copy(out, in, sizeof(*out));\n}\n\nvoid blst_p1_to_affine(POINTonE1_affine *out, const POINTonE1 *a)\n{   POINTonE1_to_affine(out, a);   }\n\nvoid blst_p1_from_affine(POINTonE1 *out, const POINTonE1_affine *a)\n{\n    vec_copy(out, a, sizeof(*a));\n    vec_select(out->Z, a->X, BLS12_381_Rx.p, sizeof(out->Z),\n                       vec_is_zero(a, sizeof(*a)));\n}\n\nstatic bool_t POINTonE1_affine_on_curve(const POINTonE1_affine *p)\n{\n    vec384 XXX, YY;\n\n    sqr_fp(XXX, p->X);\n    mul_fp(XXX, XXX, p->X);                             /* X^3 */\n    add_fp(XXX, XXX, B_E1);                             /* X^3 + B */\n\n    sqr_fp(YY, p->Y);                                   /* Y^2 */\n\n    return vec_is_equal(XXX, YY, sizeof(XXX));\n}\n\nint blst_p1_affine_on_curve(const POINTonE1_affine *p)\n{   return (int)(POINTonE1_affine_on_curve(p) | vec_is_zero(p, sizeof(*p)));   }\n\nstatic bool_t POINTonE1_on_curve(const POINTonE1 *p)\n{\n    vec384 XXX, YY, BZ6;\n    limb_t inf = vec_is_zero(p->Z, sizeof(p->Z));\n\n    sqr_fp(BZ6, p->Z);\n    mul_fp(BZ6, BZ6, p->Z);\n    sqr_fp(BZ6, BZ6);                                   /* Z^6 */\n    mul_by_b_onE1(BZ6, BZ6);                            /* B*Z^6 */\n\n    sqr_fp(XXX, p->X);\n    mul_fp(XXX, XXX, p->X);                             /* X^3 */\n    add_fp(XXX, XXX, BZ6);                              /* X^3 + B*Z^6 */\n\n    sqr_fp(YY, p->Y);                                   /* Y^2 */\n\n    return vec_is_equal(XXX, YY, sizeof(XXX)) | inf;\n}\n\nint blst_p1_on_curve(const POINTonE1 *p)\n{   return (int)POINTonE1_on_curve(p);   }\n\nstatic limb_t POINTonE1_affine_Serialize_BE(unsigned char out[96],\n                                            const POINTonE1_affine *in)\n{\n    vec384 temp;\n\n    from_fp(temp, in->X);\n    be_bytes_from_limbs(out, temp, sizeof(temp));\n\n    from_fp(temp, in->Y);\n    be_bytes_from_limbs(out + 48, temp, sizeof(temp));\n\n    return sgn0_pty_mod_384(temp, BLS12_381_P);\n}\n\nvoid blst_p1_affine_serialize(unsigned char out[96],\n                              const POINTonE1_affine *in)\n{\n    if (vec_is_zero(in->X, 2*sizeof(in->X))) {\n        bytes_zero(out, 96);\n        out[0] = 0x40;    /* infinity bit */\n    } else {\n        (void)POINTonE1_affine_Serialize_BE(out, in);\n    }\n}\n\nstatic limb_t POINTonE1_Serialize_BE(unsigned char out[96],\n                                     const POINTonE1 *in)\n{\n    POINTonE1 p;\n\n    if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) {\n        POINTonE1_from_Jacobian(&p, in);\n        in = &p;\n    }\n\n    return POINTonE1_affine_Serialize_BE(out, (const POINTonE1_affine *)in);\n}\n\nstatic void POINTonE1_Serialize(unsigned char out[96], const POINTonE1 *in)\n{\n    if (vec_is_zero(in->Z, sizeof(in->Z))) {\n        bytes_zero(out, 96);\n        out[0] = 0x40;    /* infinity bit */\n    } else {\n        (void)POINTonE1_Serialize_BE(out, in);\n    }\n}\n\nvoid blst_p1_serialize(unsigned char out[96], const POINTonE1 *in)\n{   POINTonE1_Serialize(out, in);   }\n\nstatic limb_t POINTonE1_affine_Compress_BE(unsigned char out[48],\n                                           const POINTonE1_affine *in)\n{\n    vec384 temp;\n\n    from_fp(temp, in->X);\n    be_bytes_from_limbs(out, temp, sizeof(temp));\n\n    return sgn0_pty_mont_384(in->Y, BLS12_381_P, p0);\n}\n\nvoid blst_p1_affine_compress(unsigned char out[48], const POINTonE1_affine *in)\n{\n    if (vec_is_zero(in->X, 2*sizeof(in->X))) {\n        bytes_zero(out, 48);\n        out[0] = 0xc0;    /* compressed and infinity bits */\n    } else {\n        limb_t sign = POINTonE1_affine_Compress_BE(out, in);\n        out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4));\n    }\n}\n\nstatic limb_t POINTonE1_Compress_BE(unsigned char out[48],\n                                    const POINTonE1 *in)\n{\n    POINTonE1 p;\n\n    if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) {\n        POINTonE1_from_Jacobian(&p, in);\n        in = &p;\n    }\n\n    return POINTonE1_affine_Compress_BE(out, (const POINTonE1_affine *)in);\n}\n\nvoid blst_p1_compress(unsigned char out[48], const POINTonE1 *in)\n{\n    if (vec_is_zero(in->Z, sizeof(in->Z))) {\n        bytes_zero(out, 48);\n        out[0] = 0xc0;    /* compressed and infinity bits */\n    } else {\n        limb_t sign = POINTonE1_Compress_BE(out, in);\n        out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4));\n    }\n}\n\nstatic limb_t POINTonE1_Uncompress_BE(POINTonE1_affine *out,\n                                      const unsigned char in[48])\n{\n    POINTonE1_affine ret;\n    vec384 temp;\n\n    limbs_from_be_bytes(ret.X, in, sizeof(ret.X));\n    /* clear top 3 bits in case caller was conveying some information there */\n    ret.X[sizeof(ret.X)/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3;\n    add_fp(temp, ret.X, ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.X, sizeof(temp)))\n        return (limb_t)0 - BLST_BAD_ENCODING;\n    mul_fp(ret.X, ret.X, BLS12_381_RR);\n\n    sqr_fp(ret.Y, ret.X);\n    mul_fp(ret.Y, ret.Y, ret.X);\n    add_fp(ret.Y, ret.Y, B_E1);                         /* X^3 + B */\n    if (!sqrt_fp(ret.Y, ret.Y))\n        return (limb_t)0 - BLST_POINT_NOT_ON_CURVE;\n\n    vec_copy(out, &ret, sizeof(ret));\n\n    return sgn0_pty_mont_384(out->Y, BLS12_381_P, p0);\n}\n\nstatic BLST_ERROR POINTonE1_Uncompress_Z(POINTonE1_affine *out,\n                                         const unsigned char in[48])\n{\n    unsigned char in0 = in[0];\n    limb_t sgn0_pty;\n\n    if ((in0 & 0x80) == 0)      /* compressed bit */\n        return BLST_BAD_ENCODING;\n\n    if (in0 & 0x40) {           /* infinity bit */\n        if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 47)) {\n            vec_zero(out, sizeof(*out));\n            return BLST_SUCCESS;\n        } else {\n            return BLST_BAD_ENCODING;\n        }\n    }\n\n    sgn0_pty = POINTonE1_Uncompress_BE(out, in);\n\n    if (sgn0_pty > 3)\n        return (BLST_ERROR)(0 - sgn0_pty); /* POINT_NOT_ON_CURVE */\n\n    sgn0_pty >>= 1; /* skip over parity bit */\n    sgn0_pty ^= (in0 & 0x20) >> 5;\n    cneg_fp(out->Y, out->Y, sgn0_pty);\n\n    /* (0,±2) is not in group, but application might want to ignore? */\n    return vec_is_zero(out->X, sizeof(out->X)) ? BLST_POINT_NOT_IN_GROUP\n                                               : BLST_SUCCESS;\n}\n\nBLST_ERROR blst_p1_uncompress(POINTonE1_affine *out, const unsigned char in[48])\n{   return POINTonE1_Uncompress_Z(out, in);   }\n\nstatic BLST_ERROR POINTonE1_Deserialize_BE(POINTonE1_affine *out,\n                                           const unsigned char in[96])\n{\n    POINTonE1_affine ret;\n    vec384 temp;\n\n    limbs_from_be_bytes(ret.X, in, sizeof(ret.X));\n    limbs_from_be_bytes(ret.Y, in + 48, sizeof(ret.Y));\n\n    /* clear top 3 bits in case caller was conveying some information there */\n    ret.X[sizeof(ret.X)/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3;\n    add_fp(temp, ret.X, ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.X, sizeof(temp)))\n        return BLST_BAD_ENCODING;\n\n    add_fp(temp, ret.Y, ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.Y, sizeof(temp)))\n        return BLST_BAD_ENCODING;\n\n    mul_fp(ret.X, ret.X, BLS12_381_RR);\n    mul_fp(ret.Y, ret.Y, BLS12_381_RR);\n\n    if (!POINTonE1_affine_on_curve(&ret))\n        return BLST_POINT_NOT_ON_CURVE;\n\n    vec_copy(out, &ret, sizeof(ret));\n\n    /* (0,±2) is not in group, but application might want to ignore? */\n    return vec_is_zero(out->X, sizeof(out->X)) ? BLST_POINT_NOT_IN_GROUP\n                                               : BLST_SUCCESS;\n}\n\nstatic BLST_ERROR POINTonE1_Deserialize_Z(POINTonE1_affine *out,\n                                          const unsigned char in[96])\n{\n    unsigned char in0 = in[0];\n\n    if ((in0 & 0xe0) == 0)\n        return POINTonE1_Deserialize_BE(out, in);\n\n    if (in0 & 0x80)             /* compressed bit */\n        return POINTonE1_Uncompress_Z(out, in);\n\n    if (in0 & 0x40) {           /* infinity bit */\n        if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 95)) {\n            vec_zero(out, sizeof(*out));\n            return BLST_SUCCESS;\n        }\n    }\n\n    return BLST_BAD_ENCODING;\n}\n\nBLST_ERROR blst_p1_deserialize(POINTonE1_affine *out,\n                               const unsigned char in[96])\n{   return POINTonE1_Deserialize_Z(out, in);   }\n\n#include \"ec_ops.h\"\nPOINT_DADD_IMPL(POINTonE1, 384, fp)\nPOINT_DADD_AFFINE_IMPL_A0(POINTonE1, 384, fp, BLS12_381_Rx.p)\nPOINT_ADD_IMPL(POINTonE1, 384, fp)\nPOINT_ADD_AFFINE_IMPL(POINTonE1, 384, fp, BLS12_381_Rx.p)\nPOINT_DOUBLE_IMPL_A0(POINTonE1, 384, fp)\nPOINT_IS_EQUAL_IMPL(POINTonE1, 384, fp)\n\nvoid blst_p1_add(POINTonE1 *out, const POINTonE1 *a, const POINTonE1 *b)\n{   POINTonE1_add(out, a, b);   }\n\nvoid blst_p1_add_or_double(POINTonE1 *out, const POINTonE1 *a,\n                                           const POINTonE1 *b)\n{   POINTonE1_dadd(out, a, b, NULL);   }\n\nvoid blst_p1_add_affine(POINTonE1 *out, const POINTonE1 *a,\n                                        const POINTonE1_affine *b)\n{   POINTonE1_add_affine(out, a, b);   }\n\nvoid blst_p1_add_or_double_affine(POINTonE1 *out, const POINTonE1 *a,\n                                                  const POINTonE1_affine *b)\n{   POINTonE1_dadd_affine(out, a, b);   }\n\nvoid blst_p1_double(POINTonE1 *out, const POINTonE1 *a)\n{   POINTonE1_double(out, a);   }\n\nint blst_p1_is_equal(const POINTonE1 *a, const POINTonE1 *b)\n{   return (int)POINTonE1_is_equal(a, b);   }\n\n#include \"ec_mult.h\"\nPOINT_MULT_SCALAR_WX_IMPL(POINTonE1, 4)\nPOINT_MULT_SCALAR_WX_IMPL(POINTonE1, 5)\n\n#ifdef __BLST_PRIVATE_TESTMODE__\nPOINT_AFFINE_MULT_SCALAR_IMPL(POINTonE1)\n\nDECLARE_PRIVATE_POINTXZ(POINTonE1, 384)\nPOINT_LADDER_PRE_IMPL(POINTonE1, 384, fp)\nPOINT_LADDER_STEP_IMPL_A0(POINTonE1, 384, fp, onE1)\nPOINT_LADDER_POST_IMPL_A0(POINTonE1, 384, fp, onE1)\nPOINT_MULT_SCALAR_LADDER_IMPL(POINTonE1)\n#endif\n\nstatic const vec384 beta = {            /* such that beta^3 - 1 = 0  */\n    /* -1/2 * (1 + sqrt(-3)) = ((P-2)^(P-2)) * (1 + (P-3)^((P+1)/4)) */\n    /* (0x1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4\n          897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac << 384) % P */\n    TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2),\n    TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e),\n    TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741)\n};\n\nstatic void sigma(POINTonE1 *out, const POINTonE1 *in)\n{\n    vec_copy(out->X, in->X, 2*sizeof(out->X));\n    mul_fp(out->Z, in->Z, beta);\n}\n\n/* Gallant-Lambert-Vanstone, ~45% faster than POINTonE1_mult_w5 */\nstatic void POINTonE1_mult_glv(POINTonE1 *out, const POINTonE1 *in,\n                               const pow256 SK)\n{\n    union { vec256 l; pow256 s; } val;\n\n    /* SK/z^2 [in constant time] */\n\n    limbs_from_le_bytes(val.l, SK, 32);\n    div_by_zz(val.l);\n    le_bytes_from_limbs(val.s, val.l, 32);\n\n    {\n        const byte *scalars[2] = { val.s+16, val.s };\n        POINTonE1 table[2][1<<(5-1)];   /* 4.5KB */\n        size_t i;\n\n        POINTonE1_precompute_w5(table[0], in);\n        for (i = 0; i < 1<<(5-1); i++) {\n            mul_fp(table[1][i].X, table[0][i].X, beta);\n            cneg_fp(table[1][i].Y, table[0][i].Y, 1);\n            vec_copy(table[1][i].Z, table[0][i].Z, sizeof(table[1][i].Z));\n        }\n\n        POINTonE1s_mult_w5(out, NULL, 2, scalars, 128, table);\n        POINTonE1_cneg(out, 1);\n        mul_fp(out->Z, out->Z, beta);\n        mul_fp(out->Z, out->Z, beta);\n    }\n\n    vec_zero(val.l, sizeof(val));   /* scrub the copy of SK */\n}\n\nstatic void POINTonE1_sign(POINTonE1 *out, const POINTonE1 *in, const pow256 SK)\n{\n    vec384 Z, ZZ;\n    limb_t inf;\n\n    POINTonE1_mult_glv(out, in, SK);\n\n    /* convert to affine to remove possible bias in out->Z */\n    inf = vec_is_zero(out->Z, sizeof(out->Z));\n#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION\n    flt_reciprocal_fp(Z, out->Z);                       /* 1/Z   */\n#else\n    reciprocal_fp(Z, out->Z);                           /* 1/Z   */\n#endif\n\n    sqr_fp(ZZ, Z);\n    mul_fp(out->X, out->X, ZZ);                         /* X = X/Z^2 */\n\n    mul_fp(ZZ, ZZ, Z);\n    mul_fp(out->Y, out->Y, ZZ);                         /* Y = Y/Z^3 */\n\n    vec_select(out->Z, out->Z, BLS12_381_G1.Z, sizeof(BLS12_381_G1.Z),\n                       inf);                            /* Z = inf ? 0 : 1 */\n}\n\nvoid blst_sk_to_pk_in_g1(POINTonE1 *out, const pow256 SK)\n{   POINTonE1_sign(out, &BLS12_381_G1, SK);   }\n\nvoid blst_sign_pk_in_g2(POINTonE1 *out, const POINTonE1 *msg, const pow256 SK)\n{   POINTonE1_sign(out, msg, SK);   }\n\nvoid blst_sk_to_pk2_in_g1(unsigned char out[96], POINTonE1_affine *PK,\n                          const pow256 SK)\n{\n    POINTonE1 P[1];\n\n    POINTonE1_sign(P, &BLS12_381_G1, SK);\n    if (PK != NULL)\n        vec_copy(PK, P, sizeof(*PK));\n    if (out != NULL) {\n        limb_t sgn0_pty = POINTonE1_Serialize_BE(out, P);\n        out[0] |= (sgn0_pty & 2) << 4;      /* pre-decorate */\n        out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6;\n    }\n}\n\nvoid blst_sign_pk2_in_g2(unsigned char out[96], POINTonE1_affine *sig,\n                         const POINTonE1 *hash, const pow256 SK)\n{\n    POINTonE1 P[1];\n\n    POINTonE1_sign(P, hash, SK);\n    if (sig != NULL)\n        vec_copy(sig, P, sizeof(*sig));\n    if (out != NULL) {\n        limb_t sgn0_pty = POINTonE1_Serialize_BE(out, P);\n        out[0] |= (sgn0_pty & 2) << 4;      /* pre-decorate */\n        out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6;\n    }\n}\n\nvoid blst_p1_mult(POINTonE1 *out, const POINTonE1 *a,\n                                  const byte *scalar, size_t nbits)\n{\n    if (nbits < 176) {\n        if (nbits)\n            POINTonE1_mult_w4(out, a, scalar, nbits);\n        else\n            vec_zero(out, sizeof(*out));\n    } else if (nbits <= 256) {\n        union { vec256 l; pow256 s; } val;\n        size_t i, j, top, mask = (size_t)0 - 1;\n\n        /* this is not about constant-time-ness, but branch optimization */\n        for (top = (nbits + 7)/8, i=0, j=0; i<sizeof(val.s);) {\n            val.s[i++] = scalar[j] & mask;\n            mask = 0 - ((i - top) >> (8*sizeof(top)-1));\n            j += 1 & mask;\n        }\n\n        if (check_mod_256(val.s, BLS12_381_r))  /* z^4 is the formal limit */\n            POINTonE1_mult_glv(out, a, val.s);\n        else    /* should never be the case, added for formal completeness */\n            POINTonE1_mult_w5(out, a, scalar, nbits);\n\n        vec_zero(val.l, sizeof(val));\n    } else {    /* should never be the case, added for formal completeness */\n        POINTonE1_mult_w5(out, a, scalar, nbits);\n    }\n}\n\nvoid blst_p1_unchecked_mult(POINTonE1 *out, const POINTonE1 *a,\n                                            const byte *scalar, size_t nbits)\n{\n    if (nbits)\n        POINTonE1_mult_w4(out, a, scalar, nbits);\n    else\n        vec_zero(out, sizeof(*out));\n}\n\nint blst_p1_affine_is_equal(const POINTonE1_affine *a,\n                            const POINTonE1_affine *b)\n{   return (int)vec_is_equal(a, b, sizeof(*a));   }\n\nint blst_p1_is_inf(const POINTonE1 *p)\n{   return (int)vec_is_zero(p->Z, sizeof(p->Z));   }\n\nconst POINTonE1 *blst_p1_generator(void)\n{   return &BLS12_381_G1;   }\n\nint blst_p1_affine_is_inf(const POINTonE1_affine *p)\n{   return (int)vec_is_zero(p, sizeof(*p));   }\n\nconst POINTonE1_affine *blst_p1_affine_generator(void)\n{   return (const POINTonE1_affine *)&BLS12_381_G1;   }\n\nsize_t blst_p1_sizeof(void)\n{   return sizeof(POINTonE1);   }\n\nsize_t blst_p1_affine_sizeof(void)\n{   return sizeof(POINTonE1_affine);   }\n"
  },
  {
    "path": "src/e2.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"point.h\"\n#include \"fields.h\"\n#include \"errors.h\"\n\n/*\n * y^2 = x^3 + B\n */\nstatic const vec384x B_E2 = {       /* 4 + 4*i */\n  { TO_LIMB_T(0xaa270000000cfff3), TO_LIMB_T(0x53cc0032fc34000a),\n    TO_LIMB_T(0x478fe97a6b0a807f), TO_LIMB_T(0xb1d37ebee6ba24d7),\n    TO_LIMB_T(0x8ec9733bbf78ab2f), TO_LIMB_T(0x09d645513d83de7e) },\n  { TO_LIMB_T(0xaa270000000cfff3), TO_LIMB_T(0x53cc0032fc34000a),\n    TO_LIMB_T(0x478fe97a6b0a807f), TO_LIMB_T(0xb1d37ebee6ba24d7),\n    TO_LIMB_T(0x8ec9733bbf78ab2f), TO_LIMB_T(0x09d645513d83de7e) }\n};\n\nconst POINTonE2 BLS12_381_G2 = {    /* generator point [in Montgomery] */\n{ /* (0x024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02\n        b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8 << 384) % P */\n  { TO_LIMB_T(0xf5f28fa202940a10), TO_LIMB_T(0xb3f5fb2687b4961a),\n    TO_LIMB_T(0xa1a893b53e2ae580), TO_LIMB_T(0x9894999d1a3caee9),\n    TO_LIMB_T(0x6f67b7631863366b), TO_LIMB_T(0x058191924350bcd7) },\n  /* (0x13e02b6052719f607dacd3a088274f65596bd0d09920b61a\n        b5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e << 384) % P */\n  { TO_LIMB_T(0xa5a9c0759e23f606), TO_LIMB_T(0xaaa0c59dbccd60c3),\n    TO_LIMB_T(0x3bb17e18e2867806), TO_LIMB_T(0x1b1ab6cc8541b367),\n    TO_LIMB_T(0xc2b6ed0ef2158547), TO_LIMB_T(0x11922a097360edf3) }\n},\n{ /* (0x0ce5d527727d6e118cc9cdc6da2e351aadfd9baa8cbdd3a7\n        6d429a695160d12c923ac9cc3baca289e193548608b82801 << 384) % P */\n  { TO_LIMB_T(0x4c730af860494c4a), TO_LIMB_T(0x597cfa1f5e369c5a),\n    TO_LIMB_T(0xe7e6856caa0a635a), TO_LIMB_T(0xbbefb5e96e0d495f),\n    TO_LIMB_T(0x07d3a975f0ef25a2), TO_LIMB_T(0x0083fd8e7e80dae5) },\n  /* (0x0606c4a02ea734cc32acd2b02bc28b99cb3e287e85a763af\n        267492ab572e99ab3f370d275cec1da1aaa9075ff05f79be << 384) % P */\n  { TO_LIMB_T(0xadc0fc92df64b05d), TO_LIMB_T(0x18aa270a2b1461dc),\n    TO_LIMB_T(0x86adac6a3be4eba0), TO_LIMB_T(0x79495c4ec93da33a),\n    TO_LIMB_T(0xe7175850a43ccaed), TO_LIMB_T(0x0b2bc2a163de1bf2) },\n},\n{ { ONE_MONT_P }, { 0 } }\n};\n\nconst POINTonE2 BLS12_381_NEG_G2 = { /* negative generator [in Montgomery] */\n{ /* (0x024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02\n        b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8 << 384) % P */\n  { TO_LIMB_T(0xf5f28fa202940a10), TO_LIMB_T(0xb3f5fb2687b4961a),\n    TO_LIMB_T(0xa1a893b53e2ae580), TO_LIMB_T(0x9894999d1a3caee9),\n    TO_LIMB_T(0x6f67b7631863366b), TO_LIMB_T(0x058191924350bcd7) },\n  /* (0x13e02b6052719f607dacd3a088274f65596bd0d09920b61a\n        b5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e << 384) % P */\n  { TO_LIMB_T(0xa5a9c0759e23f606), TO_LIMB_T(0xaaa0c59dbccd60c3),\n    TO_LIMB_T(0x3bb17e18e2867806), TO_LIMB_T(0x1b1ab6cc8541b367),\n    TO_LIMB_T(0xc2b6ed0ef2158547), TO_LIMB_T(0x11922a097360edf3) }\n},\n{ /* (0x0d1b3cc2c7027888be51d9ef691d77bcb679afda66c73f17\n        f9ee3837a55024f78c71363275a75d75d86bab79f74782aa << 384) % P */\n  { TO_LIMB_T(0x6d8bf5079fb65e61), TO_LIMB_T(0xc52f05df531d63a5),\n    TO_LIMB_T(0x7f4a4d344ca692c9), TO_LIMB_T(0xa887959b8577c95f),\n    TO_LIMB_T(0x4347fe40525c8734), TO_LIMB_T(0x197d145bbaff0bb5) },\n  /* (0x13fa4d4a0ad8b1ce186ed5061789213d993923066dddaf10\n        40bc3ff59f825c78df74f2d75467e25e0f55f8a00fa030ed << 384) % P */\n  { TO_LIMB_T(0x0c3e036d209afa4e), TO_LIMB_T(0x0601d8f4863f9e23),\n    TO_LIMB_T(0xe0832636bacc0a84), TO_LIMB_T(0xeb2def362a476f84),\n    TO_LIMB_T(0x64044f659f0ee1e9), TO_LIMB_T(0x0ed54f48d5a1caa7) }\n},\n{ { ONE_MONT_P }, { 0 } }\n};\n\nstatic void mul_by_b_onE2(vec384x out, const vec384x in)\n{\n    sub_fp(out[0], in[0], in[1]);\n    add_fp(out[1], in[0], in[1]);\n    lshift_fp(out[0], out[0], 2);\n    lshift_fp(out[1], out[1], 2);\n}\n\nstatic void mul_by_4b_onE2(vec384x out, const vec384x in)\n{\n    sub_fp(out[0], in[0], in[1]);\n    add_fp(out[1], in[0], in[1]);\n    lshift_fp(out[0], out[0], 4);\n    lshift_fp(out[1], out[1], 4);\n}\n\nstatic void POINTonE2_cneg(POINTonE2 *p, bool_t cbit)\n{   cneg_fp2(p->Y, p->Y, cbit);   }\n\nvoid blst_p2_cneg(POINTonE2 *a, int cbit)\n{   POINTonE2_cneg(a, is_zero(cbit) ^ 1);   }\n\nstatic void POINTonE2_from_Jacobian(POINTonE2 *out, const POINTonE2 *in)\n{\n    vec384x Z, ZZ;\n    limb_t inf = vec_is_zero(in->Z, sizeof(in->Z));\n\n    reciprocal_fp2(Z, in->Z);                           /* 1/Z */\n\n    sqr_fp2(ZZ, Z);\n    mul_fp2(out->X, in->X, ZZ);                         /* X = X/Z^2 */\n\n    mul_fp2(ZZ, ZZ, Z);\n    mul_fp2(out->Y, in->Y, ZZ);                         /* Y = Y/Z^3 */\n\n    vec_select(out->Z, in->Z, BLS12_381_G2.Z,\n                       sizeof(BLS12_381_G2.Z), inf);    /* Z = inf ? 0 : 1 */\n}\n\nvoid blst_p2_from_jacobian(POINTonE2 *out, const POINTonE2 *a)\n{   POINTonE2_from_Jacobian(out, a);   }\n\nstatic void POINTonE2_to_affine(POINTonE2_affine *out, const POINTonE2 *in)\n{\n    POINTonE2 p;\n\n    if (!vec_is_equal(in->Z, BLS12_381_Rx.p2, sizeof(in->Z))) {\n        POINTonE2_from_Jacobian(&p, in);\n        in = &p;\n    }\n    vec_copy(out, in, sizeof(*out));\n}\n\nvoid blst_p2_to_affine(POINTonE2_affine *out, const POINTonE2 *a)\n{   POINTonE2_to_affine(out, a);   }\n\nvoid blst_p2_from_affine(POINTonE2 *out, const POINTonE2_affine *a)\n{\n    vec_copy(out, a, sizeof(*a));\n    vec_select(out->Z, a->X, BLS12_381_Rx.p2, sizeof(out->Z),\n                       vec_is_zero(a, sizeof(*a)));\n}\n\nstatic bool_t POINTonE2_affine_on_curve(const POINTonE2_affine *p)\n{\n    vec384x XXX, YY;\n\n    sqr_fp2(XXX, p->X);\n    mul_fp2(XXX, XXX, p->X);                            /* X^3 */\n    add_fp2(XXX, XXX, B_E2);                            /* X^3 + B */\n\n    sqr_fp2(YY, p->Y);                                  /* Y^2 */\n\n    return vec_is_equal(XXX, YY, sizeof(XXX));\n}\n\nint blst_p2_affine_on_curve(const POINTonE2_affine *p)\n{   return (int)(POINTonE2_affine_on_curve(p) | vec_is_zero(p, sizeof(*p)));   }\n\nstatic bool_t POINTonE2_on_curve(const POINTonE2 *p)\n{\n    vec384x XXX, YY, BZ6;\n    limb_t inf = vec_is_zero(p->Z, sizeof(p->Z));\n\n    sqr_fp2(BZ6, p->Z);\n    mul_fp2(BZ6, BZ6, p->Z);\n    sqr_fp2(XXX, BZ6);                                  /* Z^6 */\n    mul_by_b_onE2(BZ6, XXX);                            /* B*Z^6 */\n\n    sqr_fp2(XXX, p->X);\n    mul_fp2(XXX, XXX, p->X);                            /* X^3 */\n    add_fp2(XXX, XXX, BZ6);                             /* X^3 + B*Z^6 */\n\n    sqr_fp2(YY, p->Y);                                  /* Y^2 */\n\n    return vec_is_equal(XXX, YY, sizeof(XXX)) | inf;\n}\n\nint blst_p2_on_curve(const POINTonE2 *p)\n{   return (int)POINTonE2_on_curve(p);   }\n\nstatic limb_t POINTonE2_affine_Serialize_BE(unsigned char out[192],\n                                            const POINTonE2_affine *in)\n{\n    vec384x temp;\n\n    from_fp(temp[1], in->X[1]);\n    be_bytes_from_limbs(out, temp[1], sizeof(temp[1]));\n    from_fp(temp[0], in->X[0]);\n    be_bytes_from_limbs(out + 48, temp[0], sizeof(temp[0]));\n\n    from_fp(temp[1], in->Y[1]);\n    be_bytes_from_limbs(out + 96, temp[1], sizeof(temp[1]));\n    from_fp(temp[0], in->Y[0]);\n    be_bytes_from_limbs(out + 144, temp[0], sizeof(temp[0]));\n\n    return sgn0_pty_mod_384x(temp, BLS12_381_P);\n}\n\nvoid blst_p2_affine_serialize(unsigned char out[192],\n                              const POINTonE2_affine *in)\n{\n    if (vec_is_zero(in->X, 2*sizeof(in->X))) {\n        bytes_zero(out, 192);\n        out[0] = 0x40;    /* infinity bit */\n    } else {\n        (void)POINTonE2_affine_Serialize_BE(out, in);\n    }\n}\n\nstatic limb_t POINTonE2_Serialize_BE(unsigned char out[192],\n                                     const POINTonE2 *in)\n{\n    POINTonE2 p;\n\n    if (!vec_is_equal(in->Z, BLS12_381_Rx.p2, sizeof(in->Z))) {\n        POINTonE2_from_Jacobian(&p, in);\n        in = &p;\n    }\n\n    return POINTonE2_affine_Serialize_BE(out, (const POINTonE2_affine *)in);\n}\n\nstatic void POINTonE2_Serialize(unsigned char out[192], const POINTonE2 *in)\n{\n    if (vec_is_zero(in->Z, sizeof(in->Z))) {\n        bytes_zero(out, 192);\n        out[0] = 0x40;    /* infinity bit */\n    } else {\n        (void)POINTonE2_Serialize_BE(out, in);\n    }\n}\n\nvoid blst_p2_serialize(unsigned char out[192], const POINTonE2 *in)\n{   POINTonE2_Serialize(out, in);   }\n\nstatic limb_t POINTonE2_affine_Compress_BE(unsigned char out[96],\n                                           const POINTonE2_affine *in)\n{\n    vec384 temp;\n\n    from_fp(temp, in->X[1]);\n    be_bytes_from_limbs(out, temp, sizeof(temp));\n    from_fp(temp, in->X[0]);\n    be_bytes_from_limbs(out + 48, temp, sizeof(temp));\n\n    return sgn0_pty_mont_384x(in->Y, BLS12_381_P, p0);\n}\n\nvoid blst_p2_affine_compress(unsigned char out[96], const POINTonE2_affine *in)\n{\n    if (vec_is_zero(in->X, 2*sizeof(in->X))) {\n        bytes_zero(out, 96);\n        out[0] = 0xc0;    /* compressed and infinity bits */\n    } else {\n        limb_t sign = POINTonE2_affine_Compress_BE(out, in);\n        out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4));\n    }\n}\n\nstatic limb_t POINTonE2_Compress_BE(unsigned char out[96],\n                                    const POINTonE2 *in)\n{\n    POINTonE2 p;\n\n    if (!vec_is_equal(in->Z, BLS12_381_Rx.p, sizeof(in->Z))) {\n        POINTonE2_from_Jacobian(&p, in);\n        in = &p;\n    }\n\n    return POINTonE2_affine_Compress_BE(out, (const POINTonE2_affine *)in);\n}\n\nvoid blst_p2_compress(unsigned char out[96], const POINTonE2 *in)\n{\n    if (vec_is_zero(in->Z, sizeof(in->Z))) {\n        bytes_zero(out, 96);\n        out[0] = 0xc0;    /* compressed and infinity bits */\n    } else {\n        limb_t sign = POINTonE2_Compress_BE(out, in);\n        out[0] |= (unsigned char)(0x80 | ((sign & 2) << 4));\n    }\n}\n\nstatic limb_t POINTonE2_Uncompress_BE(POINTonE2_affine *out,\n                                      const unsigned char in[96])\n{\n    POINTonE2_affine ret;\n    vec384 temp;\n\n    limbs_from_be_bytes(ret.X[1], in, sizeof(ret.X[1]));\n    limbs_from_be_bytes(ret.X[0], in + 48, sizeof(ret.X[0]));\n\n    /* clear top 3 bits in case caller was conveying some information there */\n    ret.X[1][sizeof(ret.X[1])/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3;\n    add_fp(temp, ret.X[1], ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.X[1], sizeof(temp)))\n        return (limb_t)0 - BLST_BAD_ENCODING;\n\n    add_fp(temp, ret.X[0], ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.X[0], sizeof(temp)))\n        return (limb_t)0 - BLST_BAD_ENCODING;\n\n    mul_fp(ret.X[0], ret.X[0], BLS12_381_RR);\n    mul_fp(ret.X[1], ret.X[1], BLS12_381_RR);\n\n    sqr_fp2(ret.Y, ret.X);\n    mul_fp2(ret.Y, ret.Y, ret.X);\n    add_fp2(ret.Y, ret.Y, B_E2);                        /* X^3 + B */\n    if (!sqrt_fp2(ret.Y, ret.Y))\n        return (limb_t)0 - BLST_POINT_NOT_ON_CURVE;\n\n    vec_copy(out, &ret, sizeof(ret));\n\n    return sgn0_pty_mont_384x(out->Y, BLS12_381_P, p0);\n}\n\nstatic BLST_ERROR POINTonE2_Uncompress_Z(POINTonE2_affine *out,\n                                         const unsigned char in[96])\n{\n    unsigned char in0 = in[0];\n    limb_t sgn0_pty;\n\n    if ((in0 & 0x80) == 0)      /* compressed bit */\n        return BLST_BAD_ENCODING;\n\n    if (in0 & 0x40) {           /* infinity bit */\n        if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 95)) {\n            vec_zero(out, sizeof(*out));\n            return BLST_SUCCESS;\n        } else {\n            return BLST_BAD_ENCODING;\n        }\n    }\n\n    sgn0_pty = POINTonE2_Uncompress_BE(out, in);\n\n    if (sgn0_pty > 3)\n        return (BLST_ERROR)(0 - sgn0_pty); /* POINT_NOT_ON_CURVE */\n\n    sgn0_pty >>= 1; /* skip over parity bit */\n    sgn0_pty ^= (in0 & 0x20) >> 5;\n    cneg_fp2(out->Y, out->Y, sgn0_pty);\n\n    return BLST_SUCCESS;\n}\n\nBLST_ERROR blst_p2_uncompress(POINTonE2_affine *out, const unsigned char in[96])\n{   return POINTonE2_Uncompress_Z(out, in);   }\n\nstatic BLST_ERROR POINTonE2_Deserialize_BE(POINTonE2_affine *out,\n                                           const unsigned char in[192])\n{\n    POINTonE2_affine ret;\n    vec384 temp;\n\n    limbs_from_be_bytes(ret.X[1], in, sizeof(ret.X[1]));\n    limbs_from_be_bytes(ret.X[0], in + 48, sizeof(ret.X[0]));\n    limbs_from_be_bytes(ret.Y[1], in + 96, sizeof(ret.Y[1]));\n    limbs_from_be_bytes(ret.Y[0], in + 144, sizeof(ret.Y[0]));\n\n    /* clear top 3 bits in case caller was conveying some information there */\n    ret.X[1][sizeof(ret.X[1])/sizeof(limb_t)-1] &= ((limb_t)0-1) >> 3;\n    add_fp(temp, ret.X[1], ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.X[1], sizeof(temp)))\n        return BLST_BAD_ENCODING;\n\n    add_fp(temp, ret.X[0], ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.X[0], sizeof(temp)))\n        return BLST_BAD_ENCODING;\n\n    add_fp(temp, ret.Y[1], ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.Y[1], sizeof(temp)))\n        return BLST_BAD_ENCODING;\n\n    add_fp(temp, ret.Y[0], ZERO_384);  /* less than modulus? */\n    if (!vec_is_equal(temp, ret.Y[0], sizeof(temp)))\n        return BLST_BAD_ENCODING;\n\n    mul_fp(ret.X[0], ret.X[0], BLS12_381_RR);\n    mul_fp(ret.X[1], ret.X[1], BLS12_381_RR);\n    mul_fp(ret.Y[0], ret.Y[0], BLS12_381_RR);\n    mul_fp(ret.Y[1], ret.Y[1], BLS12_381_RR);\n\n    if (!POINTonE2_affine_on_curve(&ret))\n        return BLST_POINT_NOT_ON_CURVE;\n\n    vec_copy(out, &ret, sizeof(ret));\n\n    return BLST_SUCCESS;\n}\n\nstatic BLST_ERROR POINTonE2_Deserialize_Z(POINTonE2_affine *out,\n                                          const unsigned char in[192])\n{\n    unsigned char in0 = in[0];\n\n    if ((in0 & 0xe0) == 0)\n        return POINTonE2_Deserialize_BE(out, in);\n\n    if (in0 & 0x80)             /* compressed bit */\n        return POINTonE2_Uncompress_Z(out, in);\n\n    if (in0 & 0x40) {           /* infinity bit */\n        if (byte_is_zero(in0 & 0x3f) & bytes_are_zero(in+1, 191)) {\n            vec_zero(out, sizeof(*out));\n            return BLST_SUCCESS;\n        }\n    }\n\n    return BLST_BAD_ENCODING;\n}\n\nBLST_ERROR blst_p2_deserialize(POINTonE2_affine *out,\n                               const unsigned char in[192])\n{   return POINTonE2_Deserialize_Z(out, in);   }\n\n#include \"ec_ops.h\"\nPOINT_DADD_IMPL(POINTonE2, 384x, fp2)\nPOINT_DADD_AFFINE_IMPL_A0(POINTonE2, 384x, fp2, BLS12_381_Rx.p2)\nPOINT_ADD_IMPL(POINTonE2, 384x, fp2)\nPOINT_ADD_AFFINE_IMPL(POINTonE2, 384x, fp2, BLS12_381_Rx.p2)\nPOINT_DOUBLE_IMPL_A0(POINTonE2, 384x, fp2)\nPOINT_IS_EQUAL_IMPL(POINTonE2, 384x, fp2)\n\nvoid blst_p2_add(POINTonE2 *out, const POINTonE2 *a, const POINTonE2 *b)\n{   POINTonE2_add(out, a, b);   }\n\nvoid blst_p2_add_or_double(POINTonE2 *out, const POINTonE2 *a,\n                                           const POINTonE2 *b)\n{   POINTonE2_dadd(out, a, b, NULL);   }\n\nvoid blst_p2_add_affine(POINTonE2 *out, const POINTonE2 *a,\n                                        const POINTonE2_affine *b)\n{   POINTonE2_add_affine(out, a, b);   }\n\nvoid blst_p2_add_or_double_affine(POINTonE2 *out, const POINTonE2 *a,\n                                                  const POINTonE2_affine *b)\n{   POINTonE2_dadd_affine(out, a, b);   }\n\nvoid blst_p2_double(POINTonE2 *out, const POINTonE2 *a)\n{   POINTonE2_double(out, a);   }\n\nint blst_p2_is_equal(const POINTonE2 *a, const POINTonE2 *b)\n{   return (int)POINTonE2_is_equal(a, b);   }\n\n#include \"ec_mult.h\"\nPOINT_MULT_SCALAR_WX_IMPL(POINTonE2, 4)\nPOINT_MULT_SCALAR_WX_IMPL(POINTonE2, 5)\n\n#ifdef __BLST_PRIVATE_TESTMODE__\nPOINT_AFFINE_MULT_SCALAR_IMPL(POINTonE2)\n\nDECLARE_PRIVATE_POINTXZ(POINTonE2, 384x)\nPOINT_LADDER_PRE_IMPL(POINTonE2, 384x, fp2)\nPOINT_LADDER_STEP_IMPL_A0(POINTonE2, 384x, fp2, onE2)\nPOINT_LADDER_POST_IMPL_A0(POINTonE2, 384x, fp2, onE2)\nPOINT_MULT_SCALAR_LADDER_IMPL(POINTonE2)\n#endif\n\nstatic void psi(POINTonE2 *out, const POINTonE2 *in)\n{\n    static const vec384x frobenius_x = { /* 1/(1 + i)^((P-1)/3) */\n      { 0 },\n      { /* (0x1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4\n              897d29650fb85f9b409427eb4f49fffd8bfd00000000aaad << 384) % P */\n        TO_LIMB_T(0x890dc9e4867545c3), TO_LIMB_T(0x2af322533285a5d5),\n        TO_LIMB_T(0x50880866309b7e2c), TO_LIMB_T(0xa20d1b8c7e881024),\n        TO_LIMB_T(0x14e4f04fe2db9068), TO_LIMB_T(0x14e56d3f1564853a) }\n    };\n    static const vec384x frobenius_y = { /* 1/(1 + i)^((P-1)/2) */\n      { /* (0x135203e60180a68ee2e9c448d77a2cd91c3dedd930b1cf60\n              ef396489f61eb45e304466cf3e67fa0af1ee7b04121bdea2 << 384) % P */\n        TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183),\n        TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18),\n        TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) },\n      { /* (0x06af0e0437ff400b6831e36d6bd17ffe48395dabc2d3435e\n              77f76e17009241c5ee67992f72ec05f4c81084fbede3cc09 << 384) % P */\n        TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c),\n        TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7),\n        TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) },\n    };\n\n    vec_copy(out, in, sizeof(*out));\n    cneg_fp(out->X[1], out->X[1], 1);   mul_fp2(out->X, out->X, frobenius_x);\n    cneg_fp(out->Y[1], out->Y[1], 1);   mul_fp2(out->Y, out->Y, frobenius_y);\n    cneg_fp(out->Z[1], out->Z[1], 1);\n}\n\n/* Galbraith-Lin-Scott, ~67% faster than POINTonE2_mul_w5 */\nstatic void POINTonE2_mult_gls(POINTonE2 *out, const POINTonE2 *in,\n                               const pow256 SK)\n{\n    union { vec256 l; pow256 s; } val;\n\n    /* break down SK to \"digits\" with |z| as radix [in constant time] */\n\n    limbs_from_le_bytes(val.l, SK, 32);\n    div_by_zz(val.l);\n    div_by_z(val.l);\n    div_by_z(val.l + NLIMBS(256)/2);\n    le_bytes_from_limbs(val.s, val.l, 32);\n\n    {\n        const byte *scalars[2] = { val.s, NULL };\n        POINTonE2 table[4][1<<(5-1)];   /* 18KB */\n        size_t i;\n\n        POINTonE2_precompute_w5(table[0], in);\n        for (i = 0; i < 1<<(5-1); i++) {\n            psi(&table[1][i], &table[0][i]);\n            psi(&table[2][i], &table[1][i]);\n            psi(&table[3][i], &table[2][i]);\n            POINTonE2_cneg(&table[1][i], 1); /* account for z being negative */\n            POINTonE2_cneg(&table[3][i], 1);\n        }\n\n        POINTonE2s_mult_w5(out, NULL, 4, scalars, 64, table);\n    }\n\n    vec_zero(val.l, sizeof(val));   /* scrub the copy of SK */\n}\n\nstatic void POINTonE2_sign(POINTonE2 *out, const POINTonE2 *in, const pow256 SK)\n{\n    vec384x Z, ZZ;\n    limb_t inf;\n\n    POINTonE2_mult_gls(out, in, SK);\n\n    /* convert to affine to remove possible bias in out->Z */\n    inf = vec_is_zero(out->Z, sizeof(out->Z));\n#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION\n    flt_reciprocal_fp2(Z, out->Z);                      /* 1/Z   */\n#else\n    reciprocal_fp2(Z, out->Z);                          /* 1/Z   */\n#endif\n\n    sqr_fp2(ZZ, Z);\n    mul_fp2(out->X, out->X, ZZ);                        /* X = X/Z^2 */\n\n    mul_fp2(ZZ, ZZ, Z);\n    mul_fp2(out->Y, out->Y, ZZ);                        /* Y = Y/Z^3 */\n\n    vec_select(out->Z, out->Z, BLS12_381_G2.Z, sizeof(BLS12_381_G2.Z),\n                       inf);                            /* Z = inf ? 0 : 1 */\n}\n\nvoid blst_sk_to_pk_in_g2(POINTonE2 *out, const pow256 SK)\n{   POINTonE2_sign(out, &BLS12_381_G2, SK);   }\n\nvoid blst_sign_pk_in_g1(POINTonE2 *out, const POINTonE2 *msg, const pow256 SK)\n{   POINTonE2_sign(out, msg, SK);   }\n\nvoid blst_sk_to_pk2_in_g2(unsigned char out[192], POINTonE2_affine *PK,\n                          const pow256 SK)\n{\n    POINTonE2 P[1];\n\n    POINTonE2_sign(P, &BLS12_381_G2, SK);\n    if (PK != NULL)\n        vec_copy(PK, P, sizeof(*PK));\n    if (out != NULL) {\n        limb_t sgn0_pty = POINTonE2_Serialize_BE(out, P);\n        out[0] |= (sgn0_pty & 2) << 4;      /* pre-decorate */\n        out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6;\n    }\n}\n\nvoid blst_sign_pk2_in_g1(unsigned char out[192], POINTonE2_affine *sig,\n                         const POINTonE2 *hash, const pow256 SK)\n{\n    POINTonE2 P[1];\n\n    POINTonE2_sign(P, hash, SK);\n    if (sig != NULL)\n        vec_copy(sig, P, sizeof(*sig));\n    if (out != NULL) {\n        limb_t sgn0_pty = POINTonE2_Serialize_BE(out, P);\n        out[0] |= (sgn0_pty & 2) << 4;      /* pre-decorate */\n        out[0] |= vec_is_zero(P->Z, sizeof(P->Z)) << 6;\n    }\n}\n\nvoid blst_p2_mult(POINTonE2 *out, const POINTonE2 *a,\n                                  const byte *scalar, size_t nbits)\n{\n    if (nbits < 144) {\n        if (nbits)\n            POINTonE2_mult_w4(out, a, scalar, nbits);\n        else\n            vec_zero(out, sizeof(*out));\n    } else if (nbits <= 256) {\n        union { vec256 l; pow256 s; } val;\n        size_t i, j, top, mask = (size_t)0 - 1;\n\n        /* this is not about constant-time-ness, but branch optimization */\n        for (top = (nbits + 7)/8, i=0, j=0; i<sizeof(val.s);) {\n            val.s[i++] = scalar[j] & mask;\n            mask = 0 - ((i - top) >> (8*sizeof(top)-1));\n            j += 1 & mask;\n        }\n\n        if (check_mod_256(val.s, BLS12_381_r))  /* z^4 is the formal limit */\n            POINTonE2_mult_gls(out, a, val.s);\n        else    /* should never be the case, added for formal completeness */\n            POINTonE2_mult_w5(out, a, scalar, nbits);\n\n        vec_zero(val.l, sizeof(val));\n    } else {    /* should never be the case, added for formal completeness */\n        POINTonE2_mult_w5(out, a, scalar, nbits);\n    }\n}\n\nvoid blst_p2_unchecked_mult(POINTonE2 *out, const POINTonE2 *a,\n                                            const byte *scalar, size_t nbits)\n{\n    if (nbits)\n        POINTonE2_mult_w4(out, a, scalar, nbits);\n    else\n        vec_zero(out, sizeof(*out));\n}\n\nint blst_p2_affine_is_equal(const POINTonE2_affine *a,\n                            const POINTonE2_affine *b)\n{   return (int)vec_is_equal(a, b, sizeof(*a));   }\n\nint blst_p2_is_inf(const POINTonE2 *p)\n{   return (int)vec_is_zero(p->Z, sizeof(p->Z));   }\n\nconst POINTonE2 *blst_p2_generator(void)\n{   return &BLS12_381_G2;   }\n\nint blst_p2_affine_is_inf(const POINTonE2_affine *p)\n{   return (int)vec_is_zero(p, sizeof(*p));   }\n\nconst POINTonE2_affine *blst_p2_affine_generator(void)\n{   return (const POINTonE2_affine *)&BLS12_381_G2;   }\n\nsize_t blst_p2_sizeof(void)\n{   return sizeof(POINTonE2);   }\n\nsize_t blst_p2_affine_sizeof(void)\n{   return sizeof(POINTonE2_affine);   }\n"
  },
  {
    "path": "src/ec_mult.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_EC_MULT_H__\n#define __BLS12_381_ASM_EC_MULT_H__\n\n#include \"point.h\"\n\n/* Works up to 9 bits */\nstatic limb_t get_wval(const byte *d, size_t off, size_t bits)\n{\n    size_t top = off + bits - 1;\n    limb_t ret;\n\n    ret = ((limb_t)d[top / 8] << 8) | d[off / 8];\n\n    return ret >> (off%8);\n}\n\n/* Works up to 25 bits. */\nstatic limb_t get_wval_limb(const byte *d, size_t off, size_t bits)\n{\n    size_t i, top = (off + bits - 1)/8;\n    limb_t ret, mask = (limb_t)0 - 1;\n\n    d   += off/8;\n    top -= off/8-1;\n\n    /* this is not about constant-time-ness, but branch optimization */\n    for (ret=0, i=0; i<4;) {\n        ret |= (*d & mask) << (8*i);\n        mask = (limb_t)0 - ((++i - top) >> (8*sizeof(top)-1));\n        d += 1 & mask;\n    }\n\n    return ret >> (off%8);\n}\n\n/*\n * Window value encoding that utilizes the fact that -P is trivially\n * calculated, which allows to halve the size of pre-computed table,\n * is attributed to A. D. Booth, hence the name of the subroutines...\n */\nstatic limb_t booth_encode(limb_t wval, size_t sz)\n{\n    limb_t mask = 0 - (wval >> sz);     /* \"sign\" bit -> mask */\n    launder(mask);\n\n    wval = (wval + 1) >> 1;\n    wval = (wval ^ mask) - mask;\n\n    /* &0x1f, but <=0x10, is index in table, rest is extended \"sign\" bit */\n    return wval;\n}\n\n/*\n * Key feature of these constant-time subroutines is that they tolerate\n * zeros in most significant bit positions of the scalar[s], or in other\n * words, zero-padded scalar values. This means that one can and should\n * pass order's bit-length, which is customarily publicly known, instead\n * of the factual scalars' bit-lengths. This is facilitated by point\n * addition subroutines implemented to handle points at infinity, which\n * are encoded as Z==0. [Doubling algorithms handle such points at\n * infinity \"naturally,\" since resulting Z is product of original Z.]\n */\n#define POINT_MULT_SCALAR_WX_IMPL(ptype, SZ) \\\nstatic bool_t ptype##_gather_booth_w##SZ(ptype *restrict p, \\\n                                         const ptype table[1<<(SZ-1)], \\\n                                         limb_t booth_idx) \\\n{ \\\n    size_t i; \\\n    bool_t booth_sign = (booth_idx >> SZ) & 1; \\\n\\\n    booth_idx &= (1<<SZ) - 1; \\\n    vec_copy(p, table, sizeof(ptype)); \\\n    /* ~6% with -Os, ~2% with -O3 ... */\\\n    for (i = 2; i <= 1<<(SZ-1); i++) \\\n        ptype##_ccopy(p, table + i - 1, byte_is_zero((byte)(i ^ booth_idx))); \\\n\\\n    ptype##_cneg(p, booth_sign); \\\n\\\n    return byte_is_zero((byte)booth_idx); \\\n} \\\n\\\nstatic void ptype##_precompute_w##SZ(ptype row[], const ptype *point) \\\n{ \\\n    size_t i, j; \\\n                                      /* row[-1] is implicit infinity */\\\n    vec_copy(&row[0], point, sizeof(ptype));        /* row[0]=p*1     */\\\n    ptype##_double(&row[1],  point);                /* row[1]=p*(1+1) */\\\n    for (i = 2, j = 1; i < 1<<(SZ-1); i += 2, j++) \\\n        ptype##_add(&row[i], &row[j], &row[j-1]),   /* row[2]=p*(2+1) */\\\n        ptype##_double(&row[i+1], &row[j]);         /* row[3]=p*(2+2) */\\\n}                                                   /* row[4] ...     */\\\n\\\nstatic void ptype##s_mult_w##SZ(ptype *ret, \\\n                                const ptype *points[], size_t npoints, \\\n                                const byte *scalars[], size_t bits, \\\n                                ptype table[][1<<(SZ-1)]) \\\n{ \\\n    limb_t wmask, wval; \\\n    size_t i, j, window, nbytes; \\\n    const byte *scalar, **scalar_s = scalars; \\\n    ptype sum[1], row[1]; \\\n    bool_t sum_is_inf, row_is_inf, ret_is_inf; \\\n\\\n    if (table == NULL) \\\n        table = (ptype (*)[1<<(SZ-1)])alloca((1<<(SZ-1)) * sizeof(ptype) * \\\n                                             npoints); \\\n\\\n    if (points != NULL) { \\\n        const ptype *point = NULL; \\\n        for (i = 0; i < npoints; i++) \\\n            point = *points ? *points++ : point+1, \\\n            ptype##_precompute_w##SZ(table[i], point); \\\n    } \\\n\\\n    nbytes = (bits + 7)/8; /* convert |bits| to bytes */ \\\n    scalar = *scalar_s++; \\\n\\\n    /* top excess bits modulo target window size */ \\\n    window = bits % SZ; /* yes, it may be zero */ \\\n    wmask = ((limb_t)1 << (window + 1)) - 1; \\\n\\\n    bits -= window; \\\n    if (bits > 0) \\\n        wval = get_wval(scalar, bits - 1, window + 1) & wmask; \\\n    else \\\n        wval = (scalar[0] << 1) & wmask; \\\n\\\n    wval = booth_encode(wval, SZ); \\\n    ret_is_inf = ptype##_gather_booth_w##SZ(ret, table[0], wval); \\\n\\\n    i = 1; \\\n    while (bits > 0) { \\\n        for (; i < npoints; i++) { \\\n            scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \\\n            wval = get_wval(scalar, bits - 1, window + 1) & wmask; \\\n            wval = booth_encode(wval, SZ); \\\n            row_is_inf = ptype##_gather_booth_w##SZ(row, table[i], wval); \\\n            ptype##_dadd(sum, ret, row, NULL); \\\n            ptype##_ccopy(ret, sum, (ret_is_inf | row_is_inf) ^ 1); \\\n            sum_is_inf = vec_is_zero(ret->Z, sizeof(ret->Z)); \\\n            ret_is_inf |= sum_is_inf; \\\n            row_is_inf |= sum_is_inf; \\\n            ptype##_ccopy(ret, row, ret_is_inf); \\\n            ret_is_inf &= row_is_inf; \\\n        } \\\n\\\n        for (j = 0; j < SZ; j++) \\\n            ptype##_double(ret, ret); \\\n\\\n        window = SZ; \\\n        wmask = ((limb_t)1 << (window + 1)) - 1; \\\n        bits -= window; \\\n        i = 0; scalar_s = scalars; \\\n    } \\\n\\\n    for (; i < npoints; i++) { \\\n        scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \\\n        wval = (scalar[0] << 1) & wmask; \\\n        wval = booth_encode(wval, SZ); \\\n        row_is_inf = ptype##_gather_booth_w##SZ(row, table[i], wval); \\\n        ptype##_dadd(sum, ret, row, NULL); \\\n        ptype##_ccopy(ret, sum, (ret_is_inf | row_is_inf) ^ 1); \\\n        sum_is_inf = vec_is_zero(ret->Z, sizeof(ret->Z)); \\\n        ret_is_inf |= sum_is_inf; \\\n        row_is_inf |= sum_is_inf; \\\n        ptype##_ccopy(ret, row, ret_is_inf); \\\n        ret_is_inf &= row_is_inf; \\\n    } \\\n\\\n    vec_czero(ret->Z, sizeof(ret->Z), ret_is_inf); \\\n} \\\n\\\nstatic void ptype##_mult_w##SZ(ptype *ret, const ptype *point, \\\n                               const byte *scalar, size_t bits) \\\n{ \\\n    limb_t wmask, wval; \\\n    size_t j, window; \\\n    ptype sum[1], row[1]; \\\n    bool_t sum_is_inf, row_is_inf, ret_is_inf; \\\n    ptype table[1<<(SZ-1)]; \\\n\\\n    ptype##_precompute_w##SZ(table, point); \\\n\\\n    /* top excess bits modulo target window size */ \\\n    window = bits % SZ;  /* yes, it may be zero */ \\\n    wmask = ((limb_t)1 << (window + 1)) - 1; \\\n\\\n    bits -= window; \\\n    wval = bits ? get_wval(scalar, bits - 1, window + 1) \\\n                : (limb_t)scalar[0] << 1; \\\n    wval &= wmask; \\\n    wval = booth_encode(wval, SZ); \\\n    ret_is_inf = ptype##_gather_booth_w##SZ(ret, table, wval); \\\n\\\n    while (bits > 0) { \\\n        for (j = 0; j < SZ; j++) \\\n            ptype##_double(ret, ret); \\\n\\\n        window = SZ; \\\n        wmask = ((limb_t)1 << (window + 1)) - 1; \\\n        bits -= window; \\\n\\\n        wval = bits ? get_wval(scalar, bits - 1, window + 1) \\\n                    : (limb_t)scalar[0] << 1; \\\n        wval &= wmask; \\\n        wval = booth_encode(wval, SZ); \\\n        row_is_inf = ptype##_gather_booth_w##SZ(row, table, wval); \\\n        ptype##_dadd(sum, ret, row, NULL); \\\n        ptype##_ccopy(ret, sum, (ret_is_inf | row_is_inf) ^ 1); \\\n        sum_is_inf = vec_is_zero(ret->Z, sizeof(ret->Z)); \\\n        ret_is_inf |= sum_is_inf; \\\n        row_is_inf |= sum_is_inf; \\\n        ptype##_ccopy(ret, row, ret_is_inf); \\\n        ret_is_inf &= row_is_inf; \\\n    } \\\n\\\n    vec_czero(ret->Z, sizeof(ret->Z), ret_is_inf); \\\n}\n\n#if 0\n/* ~50%, or ~2x[!] slower than w5... */\n#define POINT_MULT_SCALAR_LADDER_IMPL(ptype) \\\nstatic void ptype##_mult_ladder(ptype *ret, const ptype *p, \\\n                                const byte *scalar, size_t bits) \\\n{ \\\n    ptype sum[1]; \\\n    bool_t bit, pbit = 0; \\\n\\\n    vec_copy(sum, p, sizeof(ptype)); \\\n    vec_zero(ret, sizeof(ptype));   /* infinity */ \\\n\\\n    while (bits--) { \\\n        bit = is_bit_set(scalar, bits); \\\n        bit ^= pbit; \\\n        ptype##_cswap(ret, sum, bit); \\\n        ptype##_add(sum, sum, ret); \\\n        ptype##_double(ret, ret); \\\n        pbit ^= bit; \\\n    } \\\n    ptype##_cswap(ret, sum, pbit); \\\n}\n#else\n/* >40% better performance than above, [and ~30% slower than w5]... */\n#define POINT_MULT_SCALAR_LADDER_IMPL(ptype) \\\nstatic void ptype##_mult_ladder(ptype *out, const ptype *p, \\\n                                const byte *scalar, size_t bits) \\\n{ \\\n    ptype##xz sum[1]; \\\n    ptype##xz pxz[1]; \\\n    ptype##xz ret[1]; \\\n    bool_t bit, pbit = 0; \\\n\\\n    ptype##xz_ladder_pre(pxz, p); \\\n    vec_copy(sum, pxz, sizeof(ptype##xz)); \\\n    vec_zero(ret, sizeof(ptype##xz));   /* infinity */ \\\n\\\n    while (bits--) { \\\n        bit = is_bit_set(scalar, bits); \\\n        bit ^= pbit; \\\n        ptype##xz_cswap(ret, sum, bit); \\\n        ptype##xz_ladder_step(ret, sum, pxz); \\\n        pbit ^= bit; \\\n    } \\\n    ptype##xz_cswap(ret, sum, pbit); \\\n    ptype##xz_ladder_post(out, ret, sum, pxz, p->Y); \\\n}\n#endif\n\n/*\n * Sole reason for existence of this implementation is that addition\n * with affine point renders a share of multiplications redundant by\n * virtue of Z==1. And since pre-defined generator point can be and\n * customarily is instantiated affine, it would be hardly appropriate\n * to pass on this opportunity. Though while it's faster than the\n * generic ladder implementation, by ~25%, it's not faster than XZ one\n * above, <15% slower. Just in case, it's faster than generic ladder\n * even if one accounts for prior conversion to affine coordinates,\n * so that choice [for resource-constrained case] is actually between\n * this plus said conversion and XZ ladder...\n *\n * To summarize, if ptype##_mult_w5 executed in one unit of time, then\n * - naive ptype##_mult_ladder would execute in ~2;\n * - XZ version above - in ~1.4;\n * - ptype##_affine_mult_ladder below - in ~1.65;\n * - [small-footprint ptype##_to_affine would run in ~0.18].\n *\n * Caveat lector, |p_affine|*(order+2) produces wrong result, because\n * addition doesn't handle doubling. Indeed, P*(order+1) is P and it\n * fails to add with itself producing infinity in last addition. But\n * as long as |scalar| is reduced modulo order, as it should be, it's\n * not a problem...\n */\n#define POINT_AFFINE_MULT_SCALAR_IMPL(ptype) \\\nstatic void ptype##_affine_mult_ladder(ptype *ret, \\\n                                       const ptype##_affine *p_affine, \\\n                                       const byte *scalar, size_t bits) \\\n{ \\\n    ptype sum[1]; \\\n    bool_t bit; \\\n\\\n    vec_zero(ret, sizeof(ptype));   /* infinity */ \\\n\\\n    while (bits--) { \\\n        ptype##_double(ret, ret); \\\n        ptype##_add_affine(sum, ret, p_affine); \\\n        bit = (scalar[bits / LIMB_T_BITS] >> (bits % LIMB_T_BITS)) & 1; \\\n        ptype##_ccopy(ret, sum, bit); \\\n    } \\\n}\n#endif\n"
  },
  {
    "path": "src/ec_ops.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_384_ASM_EC_OPS_H__\n#define __BLS12_384_ASM_EC_OPS_H__\n/*\n * Addition that can handle doubling [as well as points at infinity,\n * which are encoded as Z==0] in constant time. It naturally comes at\n * cost, but this subroutine should be called only when independent\n * points are processed, which is considered reasonable compromise.\n * For example, ptype##s_mult_w5 calls it, but since *major* gain is\n * result of pure doublings being effectively divided by amount of\n * points, slightly slower addition can be tolerated. But what is the\n * additional cost more specifically? Best addition result is 11M+5S,\n * while this routine takes 13M+5S (+1M+1S if a4!=0), as per\n *\n * -------------+-------------\n * addition     | doubling\n * -------------+-------------\n * U1 = X1*Z2^2 | U1 = X1\n * U2 = X2*Z1^2 |\n * S1 = Y1*Z2^3 | S1 = Y1\n * S2 = Y2*Z1^3 |\n * zz = Z1*Z2   | zz = Z1\n * H = U2-U1    | H' = 2*Y1\n * R = S2-S1    | R' = 3*X1^2[+a*Z1^4]\n * sx = U1+U2   | sx = X1+X1\n * -------------+-------------\n * H!=0 || R!=0 | H==0 && R==0\n *\n *      X3 = R^2-H^2*sx\n *      Y3 = R*(H^2*U1-X3)-H^3*S1\n *      Z3 = H*zz\n *\n * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is\n * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0.\n */\n#define POINT_DADD_IMPL(ptype, bits, field) \\\nstatic void ptype##_dadd(ptype *out, const ptype *p1, const ptype *p2, \\\n                         const vec##bits a4) \\\n{ \\\n    ptype p3; /* starts as (U1, S1, zz) from addition side */\\\n    struct { vec##bits H, R, sx; } add, dbl; \\\n    bool_t p1inf, p2inf, is_dbl; \\\n\\\n    add_##field(dbl.sx, p1->X, p1->X);  /* sx = X1+X1 */\\\n    sqr_##field(dbl.R, p1->X);          /* X1^2 */\\\n    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X1^2 */\\\n    add_##field(dbl.H, p1->Y, p1->Y);   /* H = 2*Y1 */\\\n\\\n    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \\\n    sqr_##field(p3.X, p2->Z);           /* Z2^2 */\\\n    mul_##field(p3.Z, p1->Z, p2->Z);    /* Z1*Z2 */\\\n    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \\\n    sqr_##field(add.H, p1->Z);          /* Z1^2 */\\\n\\\n    if (a4 != NULL) { \\\n        sqr_##field(p3.Y, add.H);       /* Z1^4, [borrow p3.Y] */\\\n        mul_##field(p3.Y, p3.Y, a4);    \\\n        add_##field(dbl.R, dbl.R, p3.Y);/* R = 3*X1^2+a*Z1^4 */\\\n    } \\\n\\\n    mul_##field(p3.Y, p1->Y, p2->Z);    \\\n    mul_##field(p3.Y, p3.Y, p3.X);      /* S1 = Y1*Z2^3 */\\\n    mul_##field(add.R, p2->Y, p1->Z);   \\\n    mul_##field(add.R, add.R, add.H);   /* S2 = Y2*Z1^3 */\\\n    sub_##field(add.R, add.R, p3.Y);    /* R = S2-S1 */\\\n\\\n    mul_##field(p3.X, p3.X, p1->X);     /* U1 = X1*Z2^2 */\\\n    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\\\n\\\n    add_##field(add.sx, add.H, p3.X);   /* sx = U1+U2 */\\\n    sub_##field(add.H, add.H, p3.X);    /* H = U2-U1 */\\\n\\\n    /* make the choice between addition and doubling */\\\n    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));      \\\n    vec_select(&p3, p1, &p3, sizeof(p3), is_dbl);      \\\n    vec_select(&add, &dbl, &add, sizeof(add), is_dbl); \\\n    /* |p3| and |add| hold all inputs now, |p3| will hold output */\\\n\\\n    mul_##field(p3.Z, p3.Z, add.H);     /* Z3 = H*Z1*Z2 */\\\n\\\n    sqr_##field(dbl.H, add.H);          /* H^2 */\\\n    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\\\n    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\\\n    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\\\n\\\n    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\\\n    sqr_##field(p3.X, add.R);           /* R^2 */\\\n    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\\\n\\\n    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\\\n    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\\\n    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\\\n\\\n    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \\\n    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \\\n}\n\n/*\n * Addition with affine point that can handle doubling [as well as\n * points at infinity, with |p1| being encoded as Z==0 and |p2| as\n * X,Y==0] in constant time. But at what additional cost? Best\n * addition result is 7M+4S, while this routine takes 8M+5S, as per\n *\n * -------------+-------------\n * addition     | doubling\n * -------------+-------------\n * U1 = X1      | U1 = X2\n * U2 = X2*Z1^2 |\n * S1 = Y1      | S1 = Y2\n * S2 = Y2*Z1^3 |\n * H = U2-X1    | H' = 2*Y2\n * R = S2-Y1    | R' = 3*X2^2[+a]\n * sx = X1+U2   | sx = X2+X2\n * zz = H*Z1    | zz = H'\n * -------------+-------------\n * H!=0 || R!=0 | H==0 && R==0\n *\n *      X3 = R^2-H^2*sx\n *      Y3 = R*(H^2*U1-X3)-H^3*S1\n *      Z3 = zz\n *\n * As for R!=0 condition in context of H==0, a.k.a. P-P. The result is\n * infinity by virtue of Z3 = (U2-U1)*zz = H*zz = 0*zz == 0.\n */\n#define POINT_DADD_AFFINE_IMPL_A0(ptype, bits, field, one) \\\nstatic void ptype##_dadd_affine(ptype *out, const ptype *p1, \\\n                                            const ptype##_affine *p2) \\\n{ \\\n    ptype p3; /* starts as (,, H*Z1) from addition side */\\\n    struct { vec##bits H, R, sx; } add, dbl; \\\n    bool_t p1inf, p2inf, is_dbl; \\\n\\\n    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \\\n    add_##field(dbl.sx, p2->X, p2->X);  /* sx = X2+X2 */\\\n    sqr_##field(dbl.R, p2->X);          /* X2^2 */\\\n    mul_by_3_##field(dbl.R, dbl.R);     /* R = 3*X2^2 */\\\n    add_##field(dbl.H, p2->Y, p2->Y);   /* H = 2*Y2 */\\\n\\\n    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \\\n    sqr_##field(add.H, p1->Z);          /* Z1^2 */\\\n    mul_##field(add.R, add.H, p1->Z);   /* Z1^3 */\\\n    mul_##field(add.R, add.R, p2->Y);   /* S2 = Y2*Z1^3 */\\\n    sub_##field(add.R, add.R, p1->Y);   /* R = S2-Y1 */\\\n\\\n    mul_##field(add.H, add.H, p2->X);   /* U2 = X2*Z1^2 */\\\n\\\n    add_##field(add.sx, add.H, p1->X);  /* sx = X1+U2 */\\\n    sub_##field(add.H, add.H, p1->X);   /* H = U2-X1 */\\\n\\\n    mul_##field(p3.Z, add.H, p1->Z);    /* Z3 = H*Z1 */\\\n\\\n    /* make the choice between addition and doubling */ \\\n    is_dbl = vec_is_zero(add.H, 2*sizeof(add.H));       \\\n    vec_select(p3.X, p2, p1, 2*sizeof(p3.X), is_dbl);   \\\n    vec_select(p3.Z, dbl.H, p3.Z, sizeof(p3.Z), is_dbl);\\\n    vec_select(&add, &dbl, &add, sizeof(add), is_dbl);  \\\n    /* |p3| and |add| hold all inputs now, |p3| will hold output */\\\n\\\n    sqr_##field(dbl.H, add.H);          /* H^2 */\\\n    mul_##field(dbl.R, dbl.H, add.H);   /* H^3 */\\\n    mul_##field(dbl.R, dbl.R, p3.Y);    /* H^3*S1 */\\\n    mul_##field(p3.Y, dbl.H, p3.X);     /* H^2*U1 */\\\n\\\n    mul_##field(dbl.H, dbl.H, add.sx);  /* H^2*sx */\\\n    sqr_##field(p3.X, add.R);           /* R^2 */\\\n    sub_##field(p3.X, p3.X, dbl.H);     /* X3 = R^2-H^2*sx */\\\n\\\n    sub_##field(p3.Y, p3.Y, p3.X);      /* H^2*U1-X3 */\\\n    mul_##field(p3.Y, p3.Y, add.R);     /* R*(H^2*U1-X3) */\\\n    sub_##field(p3.Y, p3.Y, dbl.R);     /* Y3 = R*(H^2*U1-X3)-H^3*S1 */\\\n\\\n    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \\\n    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \\\n    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \\\n}\n\n/*\n * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl\n * with twist to handle either input at infinity, which are encoded as Z==0.\n */\n#define POINT_ADD_IMPL(ptype, bits, field) \\\nstatic void ptype##_add(ptype *out, const ptype *p1, const ptype *p2) \\\n{ \\\n    ptype p3; \\\n    vec##bits Z1Z1, Z2Z2, U1, S1, H, I, J; \\\n    bool_t p1inf, p2inf; \\\n\\\n    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \\\n    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\\\n\\\n    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\\\n    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\\\n\\\n    p2inf = vec_is_zero(p2->Z, sizeof(p2->Z)); \\\n    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\\\n\\\n    mul_##field(S1, Z2Z2, p2->Z);       /* Z2*Z2Z2 */\\\n    mul_##field(S1, S1, p1->Y);         /* S1 = Y1*Z2*Z2Z2 */\\\n\\\n    sub_##field(p3.Z, p3.Z, S1);        /* S2-S1 */\\\n    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-S1) */\\\n\\\n    mul_##field(U1, p1->X, Z2Z2);       /* U1 = X1*Z2Z2 */\\\n    mul_##field(H,  p2->X, Z1Z1);       /* U2 = X2*Z1Z1 */\\\n\\\n    sub_##field(H, H, U1);              /* H = U2-U1 */\\\n\\\n    add_##field(I, H, H);               /* 2*H */\\\n    sqr_##field(I, I);                  /* I = (2*H)^2 */\\\n\\\n    mul_##field(J, H, I);               /* J = H*I */\\\n    mul_##field(S1, S1, J);             /* S1*J */\\\n\\\n    mul_##field(p3.Y, U1, I);           /* V = U1*I */\\\n\\\n    sqr_##field(p3.X, p3.Z);            /* r^2 */\\\n    sub_##field(p3.X, p3.X, J);         /* r^2-J */\\\n    sub_##field(p3.X, p3.X, p3.Y);      \\\n    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\\\n\\\n    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\\\n    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\\\n    sub_##field(p3.Y, p3.Y, S1);        \\\n    sub_##field(p3.Y, p3.Y, S1);        /* Y3 = r*(V-X3)-2*S1*J */\\\n\\\n    add_##field(p3.Z, p1->Z, p2->Z);    /* Z1+Z2 */\\\n    sqr_##field(p3.Z, p3.Z);            /* (Z1+Z2)^2 */\\\n    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+Z2)^2-Z1Z1 */\\\n    sub_##field(p3.Z, p3.Z, Z2Z2);      /* (Z1+Z2)^2-Z1Z1-Z2Z2 */\\\n    mul_##field(p3.Z, p3.Z, H);         /* Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H */\\\n\\\n    vec_select(&p3, p1, &p3, sizeof(ptype), p2inf); \\\n    vec_select(out, p2, &p3, sizeof(ptype), p1inf); \\\n}\n\n/*\n * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl\n * with twist to handle either input at infinity, with |p1| encoded as Z==0,\n * and |p2| as X==Y==0.\n */\n#define POINT_ADD_AFFINE_IMPL(ptype, bits, field, one) \\\nstatic void ptype##_add_affine(ptype *out, const ptype *p1, \\\n                                           const ptype##_affine *p2) \\\n{ \\\n    ptype p3; \\\n    vec##bits Z1Z1, H, HH, I, J; \\\n    bool_t p1inf, p2inf; \\\n\\\n    p1inf = vec_is_zero(p1->Z, sizeof(p1->Z)); \\\n\\\n    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\\\n\\\n    mul_##field(p3.Z, Z1Z1, p1->Z);     /* Z1*Z1Z1 */\\\n    mul_##field(p3.Z, p3.Z, p2->Y);     /* S2 = Y2*Z1*Z1Z1 */\\\n\\\n    p2inf = vec_is_zero(p2->X, 2*sizeof(p2->X)); \\\n\\\n    mul_##field(H, p2->X, Z1Z1);        /* U2 = X2*Z1Z1 */\\\n    sub_##field(H, H, p1->X);           /* H = U2-X1 */\\\n\\\n    sqr_##field(HH, H);                 /* HH = H^2 */\\\n    add_##field(I, HH, HH);             \\\n    add_##field(I, I, I);               /* I = 4*HH */\\\n\\\n    mul_##field(p3.Y, p1->X, I);        /* V = X1*I */\\\n    mul_##field(J, H, I);               /* J = H*I */\\\n    mul_##field(I, J, p1->Y);           /* Y1*J */\\\n\\\n    sub_##field(p3.Z, p3.Z, p1->Y);     /* S2-Y1 */\\\n    add_##field(p3.Z, p3.Z, p3.Z);      /* r = 2*(S2-Y1) */\\\n\\\n    sqr_##field(p3.X, p3.Z);            /* r^2 */\\\n    sub_##field(p3.X, p3.X, J);         /* r^2-J */\\\n    sub_##field(p3.X, p3.X, p3.Y);      \\\n    sub_##field(p3.X, p3.X, p3.Y);      /* X3 = r^2-J-2*V */\\\n\\\n    sub_##field(p3.Y, p3.Y, p3.X);      /* V-X3 */\\\n    mul_##field(p3.Y, p3.Y, p3.Z);      /* r*(V-X3) */\\\n    sub_##field(p3.Y, p3.Y, I);         \\\n    sub_##field(p3.Y, p3.Y, I);         /* Y3 = r*(V-X3)-2*Y1*J */\\\n\\\n    add_##field(p3.Z, p1->Z, H);        /* Z1+H */\\\n    sqr_##field(p3.Z, p3.Z);            /* (Z1+H)^2 */\\\n    sub_##field(p3.Z, p3.Z, Z1Z1);      /* (Z1+H)^2-Z1Z1 */\\\n    sub_##field(p3.Z, p3.Z, HH);        /* Z3 = (Z1+H)^2-Z1Z1-HH */\\\n\\\n    vec_select(p3.Z, one, p3.Z, sizeof(p3.Z), p1inf); \\\n    vec_select(p3.X, p2,  p3.X, 2*sizeof(p3.X), p1inf); \\\n    vec_select(out, p1, &p3, sizeof(ptype), p2inf); \\\n}\n\n/*\n * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l\n */\n#define POINT_DOUBLE_IMPL_A0(ptype, bits, field) \\\nstatic void ptype##_double(ptype *p3, const ptype *p1) \\\n{ \\\n    vec##bits A, B, C; \\\n\\\n    sqr_##field(A, p1->X);              /* A = X1^2 */\\\n    sqr_##field(B, p1->Y);              /* B = Y1^2 */\\\n    sqr_##field(C, B);                  /* C = B^2 */\\\n\\\n    add_##field(B, B, p1->X);           /* X1+B */\\\n    sqr_##field(B, B);                  /* (X1+B)^2 */\\\n    sub_##field(B, B, A);               /* (X1+B)^2-A */\\\n    sub_##field(B, B, C);               /* (X1+B)^2-A-C */\\\n    add_##field(B, B, B);               /* D = 2*((X1+B)^2-A-C) */\\\n\\\n    mul_by_3_##field(A, A);             /* E = 3*A */\\\n\\\n    sqr_##field(p3->X, A);              /* F = E^2 */\\\n    sub_##field(p3->X, p3->X, B);       \\\n    sub_##field(p3->X, p3->X, B);       /* X3 = F-2*D */\\\n\\\n    add_##field(p3->Z, p1->Z, p1->Z);   /* 2*Z1 */\\\n    mul_##field(p3->Z, p3->Z, p1->Y);   /* Z3 = 2*Z1*Y1 */\\\n\\\n    mul_by_8_##field(C, C);             /* 8*C */\\\n    sub_##field(p3->Y, B, p3->X);       /* D-X3 */\\\n    mul_##field(p3->Y, p3->Y, A);       /* E*(D-X3) */\\\n    sub_##field(p3->Y, p3->Y, C);       /* Y3 = E*(D-X3)-8*C */\\\n}\n\n#define POINT_LADDER_PRE_IMPL(ptype, bits, field) \\\nstatic void ptype##xz_ladder_pre(ptype##xz *pxz, const ptype *p) \\\n{ \\\n    mul_##field(pxz->X, p->X, p->Z);    /* X2 = X1*Z1 */\\\n    sqr_##field(pxz->Z, p->Z);          \\\n    mul_##field(pxz->Z, pxz->Z, p->Z);  /* Z2 = Z1^3 */\\\n}\n\n/*\n * https://hyperelliptic.org/EFD/g1p/auto-shortw-xz.html#ladder-ladd-2002-it-3\n * with twist to handle either input at infinity, which are encoded as Z==0.\n * Just in case, order of doubling and addition is reverse in comparison to\n * hyperelliptic.org entry. This was done to minimize temporary storage.\n *\n * XZ1 is |p|, XZ2&XZ4 are in&out |r|, XZ3&XZ5 are in&out |s|.\n */\n#define POINT_LADDER_STEP_IMPL_A0(ptype, bits, field, suffix4b) \\\nstatic void ptype##xz_ladder_step(ptype##xz *r, ptype##xz *s, \\\n                                  const ptype##xz *p) \\\n{ \\\n    ptype##xz p5; \\\n    vec##bits A, B, C, D, XX, ZZ; \\\n    bool_t r_inf, s_inf; \\\n                                        /* s += r */\\\n    mul_##field(A, r->X, s->X);         /* A = X2*X3 */\\\n    mul_##field(B, r->Z, s->Z);         /* B = Z2*Z3 */\\\n    mul_##field(C, r->X, s->Z);         /* C = X2*Z3 */\\\n    mul_##field(D, r->Z, s->X);         /* D = X3*Z2 */\\\n\\\n    sqr_##field(A, A);                  /* (A[-a*B])^2 */\\\n    add_##field(p5.X, C, D);            /* C+D */\\\n    mul_##field(p5.X, p5.X, B);         /* B*(C+D) */\\\n    mul_by_4b_##suffix4b(B, p5.X);      /* b4*B*(C+D) */\\\n    sub_##field(p5.X, A, B);            /* (A[-a*B])^2-b4*B*(C+D) */\\\n    mul_##field(p5.X, p5.X, p->Z);      /* X5 = Z1*((A[-a*B])^2-b4*B*(C+D)) */\\\n\\\n    sub_##field(p5.Z, C, D);            /* C-D */\\\n    sqr_##field(p5.Z, p5.Z);            /* (C-D)^2 */\\\n    mul_##field(p5.Z, p5.Z, p->X);      /* Z5 = X1*(C-D)^2 */\\\n\\\n    r_inf = vec_is_zero(r->Z, sizeof(r->Z)); \\\n    s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \\\n\\\n    vec_select(&p5, r, &p5, sizeof(ptype##xz), s_inf); \\\n    vec_select(s,   s, &p5, sizeof(ptype##xz), r_inf); \\\n                                        /* r *= 2 */\\\n    sqr_##field(XX, r->X);              /* XX = X2^2 */\\\n    sqr_##field(ZZ, r->Z);              /* ZZ = Z2^2 */\\\n\\\n    add_##field(r->Z, r->X, r->Z);      /* X2+Z2 */\\\n    sqr_##field(r->Z, r->Z);            /* (X2+Z2)^2 */\\\n    sub_##field(r->Z, r->Z, XX);        /* (X2+Z2)^2-XX */\\\n    sub_##field(r->Z, r->Z, ZZ);        /* E = (X2+Z2)^2-XX-ZZ */\\\n\\\n    sqr_##field(A, XX);                 /* (XX[-a*ZZ])^2 */\\\n    mul_##field(B, r->Z, ZZ);           /* E*ZZ */\\\n    mul_by_4b_##suffix4b(C, B);         /* b4*E*ZZ */\\\n    sub_##field(r->X, A, C);            /* X4 = (XX[-a*ZZ])^2-b4*E*ZZ */\\\n\\\n    sqr_##field(ZZ, ZZ);                /* ZZ^2 */\\\n    mul_by_4b_##suffix4b(B, ZZ);        /* b4*ZZ^2 */\\\n    mul_##field(r->Z, r->Z, XX);        /* E*(XX[+a*ZZ]) */\\\n    add_##field(r->Z, r->Z, r->Z);      /* 2*E*(XX[+a*ZZ]) */\\\n    add_##field(r->Z, r->Z, B);         /* Z4 = 2*E*(XX[+a*ZZ])+b4*ZZ^2 */\\\n}\n\n/*\n * Recover the |r|'s y-coordinate using Eq. (8) from Brier-Joye,\n * \"Weierstraß Elliptic Curves and Side-Channel Attacks\", with XZ twist\n * and conversion to Jacobian coordinates from <openssl>/.../ecp_smpl.c,\n * and with twist to recover from |s| at infinity [which occurs when\n * multiplying by (order-1)].\n *\n * X4 = 2*Y1*X2*Z3*Z1*Z2\n * Y4 = 2*b*Z3*(Z1*Z2)^2 + Z3*(a*Z1*Z2+X1*X2)*(X1*Z2+X2*Z1) - X3*(X1*Z2-X2*Z1)^2\n * Z4 = 2*Y1*Z3*Z2^2*Z1\n *\n * Z3x2 = 2*Z3\n * Y1Z3x2 = Y1*Z3x2\n * Z1Z2 = Z1*Z2\n * X1Z2 = X1*Z2\n * X2Z1 = X2*Z1\n * X4 = Y1Z3x2*X2*Z1Z2\n * A = b*Z3x2*(Z1Z2)^2\n * B = Z3*(a*Z1Z2+X1*X2)*(X1Z2+X2Z1)\n * C = X3*(X1Z2-X2Z1)^2\n * Y4 = A+B-C\n * Z4 = Y1Z3x2*Z1Z2*Z2\n *\n * XZ1 is |p|, XZ2 is |r|, XZ3 is |s|, 'a' is 0.\n */\n#define POINT_LADDER_POST_IMPL_A0(ptype, bits, field, suffixb) \\\nstatic void ptype##xz_ladder_post(ptype *p4, \\\n                                  const ptype##xz *r, const ptype##xz *s, \\\n                                  const ptype##xz *p, const vec##bits Y1) \\\n{ \\\n    vec##bits Z3x2, Y1Z3x2, Z1Z2, X1Z2, X2Z1, A, B, C; \\\n    bool_t s_inf; \\\n\\\n    add_##field(Z3x2, s->Z, s->Z);      /* Z3x2 = 2*Z3 */\\\n    mul_##field(Y1Z3x2, Y1, Z3x2);      /* Y1Z3x2 = Y1*Z3x2 */\\\n    mul_##field(Z1Z2, p->Z, r->Z);      /* Z1Z2 = Z1*Z2 */\\\n    mul_##field(X1Z2, p->X, r->Z);      /* X1Z2 = X1*Z2 */\\\n    mul_##field(X2Z1, r->X, p->Z);      /* X2Z1 = X2*Z1 */\\\n\\\n    mul_##field(p4->X, Y1Z3x2, r->X);   /* Y1Z3x2*X2 */\\\n    mul_##field(p4->X, p4->X, Z1Z2);    /* X4 = Y1Z3x2*X2*Z1Z2 */\\\n\\\n    sqr_##field(A, Z1Z2);               /* (Z1Z2)^2 */\\\n    mul_##field(B, A, Z3x2);            /* Z3x2*(Z1Z2)^2 */\\\n    mul_by_b_##suffixb(A, B);           /* A = b*Z3x2*(Z1Z2)^2 */\\\n\\\n    mul_##field(B, p->X, r->X);         /* [a*Z1Z2+]X1*X2 */\\\n    mul_##field(B, B, s->Z);            /* Z3*([a*Z1Z2+]X1*X2) */\\\n    add_##field(C, X1Z2, X2Z1);         /* X1Z2+X2Z1 */\\\n    mul_##field(B, B, C);               /* B = Z3*([a*Z2Z1+]X1*X2)*(X1Z2+X2Z1) */\\\n\\\n    sub_##field(C, X1Z2, X2Z1);         /* X1Z2-X2Z1 */\\\n    sqr_##field(C, C);                  /* (X1Z2-X2Z1)^2 */\\\n    mul_##field(C, C, s->X);            /* C = X3*(X1Z2-X2Z1)^2 */\\\n\\\n    add_##field(A, A, B);               /* A+B */\\\n    sub_##field(A, A, C);               /* Y4 = A+B-C */\\\n\\\n    mul_##field(p4->Z, Z1Z2, r->Z);     /* Z1Z2*Z2 */\\\n    mul_##field(p4->Z, p4->Z, Y1Z3x2);  /* Y1Z3x2*Z1Z2*Z2 */\\\n\\\n    s_inf = vec_is_zero(s->Z, sizeof(s->Z)); \\\n    vec_select(p4->X, p->X, p4->X, sizeof(p4->X), s_inf); \\\n    vec_select(p4->Y, Y1,   A,     sizeof(p4->Y), s_inf); \\\n    vec_select(p4->Z, p->Z, p4->Z, sizeof(p4->Z), s_inf); \\\n    ptype##_cneg(p4, s_inf); \\\n                                        /* to Jacobian */\\\n    mul_##field(p4->X, p4->X, p4->Z);   /* X4 = X4*Z4 */\\\n    sqr_##field(B, p4->Z);              \\\n    mul_##field(p4->Y, p4->Y, B);       /* Y4 = Y4*Z4^2 */\\\n}\n\n#define POINT_IS_EQUAL_IMPL(ptype, bits, field) \\\nstatic limb_t ptype##_is_equal(const ptype *p1, const ptype *p2) \\\n{ \\\n    vec##bits Z1Z1, Z2Z2; \\\n    ptype##_affine a1, a2; \\\n    bool_t is_inf1 = vec_is_zero(p1->Z, sizeof(p1->Z)); \\\n    bool_t is_inf2 = vec_is_zero(p2->Z, sizeof(p2->Z)); \\\n\\\n    sqr_##field(Z1Z1, p1->Z);           /* Z1Z1 = Z1^2 */\\\n    sqr_##field(Z2Z2, p2->Z);           /* Z2Z2 = Z2^2 */\\\n\\\n    mul_##field(a1.X, p1->X, Z2Z2);     /* U1 = X1*Z2Z2 */\\\n    mul_##field(a2.X, p2->X, Z1Z1);     /* U2 = X2*Z1Z1 */\\\n\\\n    mul_##field(a1.Y, p1->Y, p2->Z);    /* Y1*Z2 */\\\n    mul_##field(a2.Y, p2->Y, p1->Z);    /* Y2*Z1 */\\\n\\\n    mul_##field(a1.Y, a1.Y, Z2Z2);      /* S1 = Y1*Z2*Z2Z2 */\\\n    mul_##field(a2.Y, a2.Y, Z1Z1);      /* S2 = Y2*Z1*Z1Z1 */\\\n\\\n    return vec_is_equal(&a1, &a2, sizeof(a1)) & (is_inf1 ^ is_inf2 ^ 1); \\\n}\n\n/*\n * https://eprint.iacr.org/2015/1060, algorithm 7 with a twist to handle\n * |p3| pointing at either |p1| or |p2|. This is resolved by adding |t5|\n * and replacing few first references to |X3| in the formula, up to step\n * 21, with it. 12M[+27A], doubling and infinity are handled by the\n * formula itself. Infinity is to be encoded as [0, !0, 0].\n */\n#define POINT_PROJ_DADD_IMPL_A0(ptype, bits, field, suffixb) \\\nstatic void ptype##proj_dadd(ptype##proj *p3, const ptype##proj *p1, \\\n                                              const ptype##proj *p2) \\\n{ \\\n    vec##bits t0, t1, t2, t3, t4, t5; \\\n\\\n    mul_##field(t0, p1->X, p2->X);      /* 1.     t0 = X1*X2 */\\\n    mul_##field(t1, p1->Y, p2->Y);      /* 2.     t1 = Y1*Y2 */\\\n    mul_##field(t2, p1->Z, p2->Z);      /* 3.     t2 = Z1*Z2 */\\\n    add_##field(t3, p1->X, p1->Y);      /* 4.     t3 = X1+Y1 */\\\n    add_##field(t4, p2->X, p2->Y);      /* 5.     t4 = X2+Y2 */\\\n    mul_##field(t3, t3, t4);            /* 6.     t3 = t3*t4 */\\\n    add_##field(t4, t0, t1);            /* 7.     t4 = t0+t1 */\\\n    sub_##field(t3, t3, t4);            /* 8.     t3 = t3-t4 */\\\n    add_##field(t4, p1->Y, p1->Z);      /* 9.     t4 = Y1+Z1 */\\\n    add_##field(t5, p2->Y, p2->Z);      /* 10.    t5 = Y2+Z2 */\\\n    mul_##field(t4, t4, t5);            /* 11.    t4 = t4*t5 */\\\n    add_##field(t5, t1, t2);            /* 12.    t5 = t1+t2 */\\\n    sub_##field(t4, t4, t5);            /* 13.    t4 = t4-t5 */\\\n    add_##field(t5, p1->X, p1->Z);      /* 14.    t5 = X1+Z1 */\\\n    add_##field(p3->Y, p2->X, p2->Z);   /* 15.    Y3 = X2+Z2 */\\\n    mul_##field(t5, t5, p3->Y);         /* 16.    t5 = t5*Y3 */\\\n    add_##field(p3->Y, t0, t2);         /* 17.    Y3 = t0+t2 */\\\n    sub_##field(p3->Y, t5, p3->Y);      /* 18.    Y3 = t5-Y3 */\\\n    mul_by_3_##field(t0, t0);           /* 19-20. t0 = 3*t0  */\\\n    mul_by_3_##field(t5, t2);           /* 21.    t5 = 3*t2  */\\\n    mul_by_b_##suffixb(t2, t5);         /* 21.    t2 = b*t5  */\\\n    add_##field(p3->Z, t1, t2);         /* 22.    Z3 = t1+t2 */\\\n    sub_##field(t1, t1, t2);            /* 23.    t1 = t1-t2 */\\\n    mul_by_3_##field(t5, p3->Y);        /* 24.    t5 = 3*Y3  */\\\n    mul_by_b_##suffixb(p3->Y, t5);      /* 24.    Y3 = b*t5  */\\\n    mul_##field(p3->X, t4, p3->Y);      /* 25.    X3 = t4*Y3 */\\\n    mul_##field(t2, t3, t1);            /* 26.    t2 = t3*t1 */\\\n    sub_##field(p3->X, t2, p3->X);      /* 27.    X3 = t2-X3 */\\\n    mul_##field(p3->Y, p3->Y, t0);      /* 28.    Y3 = Y3*t0 */\\\n    mul_##field(t1, t1, p3->Z);         /* 29.    t1 = t1*Z3 */\\\n    add_##field(p3->Y, t1, p3->Y);      /* 30.    Y3 = t1+Y3 */\\\n    mul_##field(t0, t0, t3);            /* 31.    t0 = t0*t3 */\\\n    mul_##field(p3->Z, p3->Z, t4);      /* 32.    Z3 = Z3*t4 */\\\n    add_##field(p3->Z, p3->Z, t0);      /* 33.    Z3 = Z3+t0 */\\\n}\n\n/*\n * https://eprint.iacr.org/2015/1060, algorithm 8 with a twist to handle\n * |p2| being infinity encoded as [0, 0]. 11M[+21A].\n */\n#define POINT_PROJ_DADD_AFFINE_IMPL_A0(ptype, bits, field, suffixb) \\\nstatic void ptype##proj_dadd_affine(ptype##proj *out, const ptype##proj *p1, \\\n                                                      const ptype##_affine *p2) \\\n{ \\\n    ptype##proj p3[1]; \\\n    vec##bits t0, t1, t2, t3, t4; \\\n    limb_t p2inf = vec_is_zero(p2, sizeof(*p2)); \\\n\\\n    mul_##field(t0, p1->X, p2->X);      /* 1.     t0 = X1*X2 */\\\n    mul_##field(t1, p1->Y, p2->Y);      /* 2.     t1 = Y1*Y2 */\\\n    add_##field(t3, p1->X, p1->Y);      /* 3.     t3 = X1+Y1 */\\\n    add_##field(t4, p2->X, p2->Y);      /* 4.     t4 = X2+Y2 */\\\n    mul_##field(t3, t3, t4);            /* 5.     t3 = t3*t4 */\\\n    add_##field(t4, t0, t1);            /* 6.     t4 = t0+t1 */\\\n    sub_##field(t3, t3, t4);            /* 7.     t3 = t3-t4 */\\\n    mul_##field(t4, p2->Y, p1->Z);      /* 8.     t4 = Y2*Z1 */\\\n    add_##field(t4, t4, p1->Y);         /* 9.     t4 = t4+Y1 */\\\n    mul_##field(p3->Y, p2->X, p1->Z);   /* 10.    Y3 = X2*Z1 */\\\n    add_##field(p3->Y, p3->Y, p1->X);   /* 11.    Y3 = Y3+X1 */\\\n    mul_by_3_##field(t0, t0);           /* 12-13. t0 = 3*t0  */\\\n    mul_by_b_##suffixb(t2, p1->Z);      /* 14.    t2 = b*Z1  */\\\n    mul_by_3_##field(t2, t2);           /* 14.    t2 = 3*t2  */\\\n    add_##field(p3->Z, t1, t2);         /* 15.    Z3 = t1+t2 */\\\n    sub_##field(t1, t1, t2);            /* 16.    t1 = t1-t2 */\\\n    mul_by_b_##suffixb(t2, p3->Y);      /* 17.    t2 = b*Y3  */\\\n    mul_by_3_##field(p3->Y, t2);        /* 17.    Y3 = 3*t2  */\\\n    mul_##field(p3->X, t4, p3->Y);      /* 18.    X3 = t4*Y3 */\\\n    mul_##field(t2, t3, t1);            /* 19.    t2 = t3*t1 */\\\n    sub_##field(p3->X, t2, p3->X);      /* 20.    X3 = t2-X3 */\\\n    mul_##field(p3->Y, p3->Y, t0);      /* 21.    Y3 = Y3*t0 */\\\n    mul_##field(t1, t1, p3->Z);         /* 22.    t1 = t1*Z3 */\\\n    add_##field(p3->Y, t1, p3->Y);      /* 23.    Y3 = t1+Y3 */\\\n    mul_##field(t0, t0, t3);            /* 24.    t0 = t0*t3 */\\\n    mul_##field(p3->Z, p3->Z, t4);      /* 25.    Z3 = Z3*t4 */\\\n    add_##field(p3->Z, p3->Z, t0);      /* 26.    Z3 = Z3+t0 */\\\n\\\n    vec_select(out, p1, p3, sizeof(*out), p2inf); \\\n}\n\n/*\n * https://eprint.iacr.org/2015/1060, algorithm 9 with a twist to handle\n * |p3| pointing at |p1|. This is resolved by adding |t3| to hold X*Y\n * and reordering operations to bring references to |p1| forward.\n * 6M+2S[+13A].\n */\n#define POINT_PROJ_DOUBLE_IMPL_A0(ptype, bits, field, suffixb) \\\nstatic void ptype##proj_double(ptype##proj *p3, const ptype##proj *p1) \\\n{ \\\n    vec##bits t0, t1, t2, t3; \\\n\\\n    sqr_##field(t0, p1->Y);             /* 1.     t0 = Y*Y   */\\\n    mul_##field(t1, p1->Y, p1->Z);      /* 5.     t1 = Y*Z   */\\\n    sqr_##field(t2, p1->Z);             /* 6.     t2 = Z*Z   */\\\n    mul_##field(t3, p1->X, p1->Y);      /* 16.    t3 = X*Y   */\\\n    lshift_##field(p3->Z, t0, 3);       /* 2-4.   Z3 = 8*t0  */\\\n    mul_by_b_##suffixb(p3->X, t2);      /* 7.     t2 = b*t2  */\\\n    mul_by_3_##field(t2, p3->X);        /* 7.     t2 = 3*t2  */\\\n    mul_##field(p3->X, t2, p3->Z);      /* 8.     X3 = t2*Z3 */\\\n    add_##field(p3->Y, t0, t2);         /* 9.     Y3 = t0+t2 */\\\n    mul_##field(p3->Z, t1, p3->Z);      /* 10.    Z3 = t1*Z3 */\\\n    mul_by_3_##field(t2, t2);           /* 11-12. t2 = 3*t2  */\\\n    sub_##field(t0, t0, t2);            /* 13.    t0 = t0-t2 */\\\n    mul_##field(p3->Y, t0, p3->Y);      /* 14.    Y3 = t0*Y3 */\\\n    add_##field(p3->Y, p3->X, p3->Y);   /* 15.    Y3 = X3+Y3 */\\\n    mul_##field(p3->X, t0, t3);         /* 17.    X3 = t0*t3 */\\\n    add_##field(p3->X, p3->X, p3->X);   /* 18.    X3 = X3+X3 */\\\n}\n\n#define POINT_PROJ_TO_JACOBIAN_IMPL(ptype, bits, field) \\\nstatic void ptype##proj_to_Jacobian(ptype *out, const ptype##proj *in) \\\n{ \\\n    vec##bits ZZ; \\\n\\\n    sqr_##field(ZZ, in->Z); \\\n    mul_##field(out->X, in->X, in->Z); \\\n    mul_##field(out->Y, in->Y, ZZ); \\\n    vec_copy(out->Z, in->Z, sizeof(out->Z)); \\\n}\n\n#define POINT_TO_PROJECTIVE_IMPL(ptype, bits, field, one) \\\nstatic void ptype##_to_projective(ptype##proj *out, const ptype *in) \\\n{ \\\n    vec##bits ZZ; \\\n    limb_t is_inf = vec_is_zero(in->Z, sizeof(in->Z)); \\\n\\\n    sqr_##field(ZZ, in->Z); \\\n    mul_##field(out->X, in->X, in->Z); \\\n    vec_select(out->Y, one, in->Y, sizeof(out->Y), is_inf); \\\n    mul_##field(out->Z, ZZ, in->Z); \\\n}\n\n/******************* !!!!! NOT CONSTANT TIME !!!!! *******************/\n\n/*\n * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-add-2008-s\n * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-dbl-2008-s-1\n * with twist to handle either input at infinity. Addition costs 12M+2S,\n * while conditional doubling - 4M+6M+3S.\n */\n#define POINTXYZZ_DADD_IMPL(ptype, bits, field) \\\nstatic void ptype##xyzz_dadd(ptype##xyzz *p3, const ptype##xyzz *p1, \\\n                                              const ptype##xyzz *p2) \\\n{ \\\n    vec##bits U, S, P, R; \\\n\\\n    if (vec_is_zero(p2->ZZZ, 2*sizeof(p2->ZZZ))) { \\\n        vec_copy(p3, p1, sizeof(*p3));  \\\n        return; \\\n    } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \\\n        vec_copy(p3, p2, sizeof(*p3));  \\\n        return; \\\n    } \\\n\\\n    mul_##field(U, p1->X, p2->ZZ);              /* U1 = X1*ZZ2 */\\\n    mul_##field(S, p1->Y, p2->ZZZ);             /* S1 = Y1*ZZZ2 */\\\n    mul_##field(P, p2->X, p1->ZZ);              /* U2 = X2*ZZ1 */\\\n    mul_##field(R, p2->Y, p1->ZZZ);             /* S2 = Y2*ZZZ1 */\\\n    sub_##field(P, P, U);                       /* P = U2-U1 */\\\n    sub_##field(R, R, S);                       /* R = S2-S1 */\\\n\\\n    if (!vec_is_zero(P, sizeof(P))) {           /* X1!=X2 */\\\n        vec##bits PP, PPP, Q;                   /* add |p1| and |p2| */\\\n\\\n        sqr_##field(PP, P);                     /* PP = P^2 */\\\n        mul_##field(PPP, PP, P);                /* PPP = P*PP */\\\n        mul_##field(Q, U, PP);                  /* Q = U1*PP */\\\n        sqr_##field(p3->X, R);                  /* R^2 */\\\n        add_##field(P, Q, Q); \\\n        sub_##field(p3->X, p3->X, PPP);         /* R^2-PPP */\\\n        sub_##field(p3->X, p3->X, P);           /* X3 = R^2-PPP-2*Q */\\\n        sub_##field(Q, Q, p3->X); \\\n        mul_##field(Q, Q, R);                   /* R*(Q-X3) */\\\n        mul_##field(p3->Y, S, PPP);             /* S1*PPP */\\\n        sub_##field(p3->Y, Q, p3->Y);           /* Y3 = R*(Q-X3)-S1*PPP */\\\n        mul_##field(p3->ZZ, p1->ZZ, p2->ZZ);    /* ZZ1*ZZ2 */\\\n        mul_##field(p3->ZZZ, p1->ZZZ, p2->ZZZ); /* ZZZ1*ZZZ2 */\\\n        mul_##field(p3->ZZ, p3->ZZ, PP);        /* ZZ3 = ZZ1*ZZ2*PP */\\\n        mul_##field(p3->ZZZ, p3->ZZZ, PPP);     /* ZZZ3 = ZZZ1*ZZZ2*PPP */\\\n    } else if (vec_is_zero(R, sizeof(R))) {     /* X1==X2 && Y1==Y2 */\\\n        vec##bits V, W, M;                      /* double |p1| */\\\n\\\n        add_##field(U, p1->Y, p1->Y);           /* U = 2*Y1 */\\\n        sqr_##field(V, U);                      /* V = U^2 */\\\n        mul_##field(W, V, U);                   /* W = U*V */\\\n        mul_##field(S, p1->X, V);               /* S = X1*V */\\\n        sqr_##field(M, p1->X); \\\n        mul_by_3_##field(M, M);                 /* M = 3*X1^2[+a*ZZ1^2] */\\\n        sqr_##field(p3->X, M); \\\n        add_##field(U, S, S);                   /* 2*S */\\\n        sub_##field(p3->X, p3->X, U);           /* X3 = M^2-2*S */\\\n        mul_##field(p3->Y, W, p1->Y);           /* W*Y1 */\\\n        sub_##field(S, S, p3->X); \\\n        mul_##field(S, S, M);                   /* M*(S-X3) */\\\n        sub_##field(p3->Y, S, p3->Y);           /* Y3 = M*(S-X3)-W*Y1 */\\\n        mul_##field(p3->ZZ, p1->ZZ, V);         /* ZZ3 = V*ZZ1 */\\\n        mul_##field(p3->ZZZ, p1->ZZZ, W);       /* ZZ3 = W*ZZZ1 */\\\n    } else {                                    /* X1==X2 && Y1==-Y2 */\\\n        vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ));   /* set |p3| to infinity */\\\n    } \\\n}\n\n/*\n * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#addition-madd-2008-s\n * http://hyperelliptic.org/EFD/g1p/auto-shortw-xyzz.html#doubling-mdbl-2008-s-1\n * with twists to handle even subtractions and either input at infinity.\n * Addition costs 8M+2S, while conditional doubling - 2M+4M+3S.\n */\n#define POINTXYZZ_DADD_AFFINE_IMPL(ptype, bits, field, one) \\\nstatic void ptype##xyzz_dadd_affine(ptype##xyzz *p3, const ptype##xyzz *p1, \\\n                                                     const ptype##_affine *p2, \\\n                                                     bool_t subtract) \\\n{ \\\n    vec##bits P, R; \\\n\\\n    if (vec_is_zero(p2, sizeof(*p2))) { \\\n        vec_copy(p3, p1, sizeof(*p3));  \\\n        return; \\\n    } else if (vec_is_zero(p1->ZZZ, 2*sizeof(p1->ZZZ))) { \\\n        vec_copy(p3->X, p2->X, 2*sizeof(p3->X));\\\n        cneg_##field(p3->ZZZ, one, subtract);   \\\n        vec_copy(p3->ZZ, one, sizeof(p3->ZZ));  \\\n        return; \\\n    } \\\n\\\n    mul_##field(P, p2->X, p1->ZZ);              /* U2 = X2*ZZ1 */\\\n    mul_##field(R, p2->Y, p1->ZZZ);             /* S2 = Y2*ZZZ1 */\\\n    cneg_##field(R, R, subtract); \\\n    sub_##field(P, P, p1->X);                   /* P = U2-X1 */\\\n    sub_##field(R, R, p1->Y);                   /* R = S2-Y1 */\\\n\\\n    if (!vec_is_zero(P, sizeof(P))) {           /* X1!=X2 */\\\n        vec##bits PP, PPP, Q;                   /* add |p2| to |p1| */\\\n\\\n        sqr_##field(PP, P);                     /* PP = P^2 */\\\n        mul_##field(PPP, PP, P);                /* PPP = P*PP */\\\n        mul_##field(Q, p1->X, PP);              /* Q = X1*PP */\\\n        sqr_##field(p3->X, R);                  /* R^2 */\\\n        add_##field(P, Q, Q); \\\n        sub_##field(p3->X, p3->X, PPP);         /* R^2-PPP */\\\n        sub_##field(p3->X, p3->X, P);           /* X3 = R^2-PPP-2*Q */\\\n        sub_##field(Q, Q, p3->X); \\\n        mul_##field(Q, Q, R);                   /* R*(Q-X3) */\\\n        mul_##field(p3->Y, p1->Y, PPP);         /* Y1*PPP */\\\n        sub_##field(p3->Y, Q, p3->Y);           /* Y3 = R*(Q-X3)-Y1*PPP */\\\n        mul_##field(p3->ZZ, p1->ZZ, PP);        /* ZZ3 = ZZ1*PP */\\\n        mul_##field(p3->ZZZ, p1->ZZZ, PPP);     /* ZZZ3 = ZZZ1*PPP */\\\n    } else if (vec_is_zero(R, sizeof(R))) {     /* X1==X2 && Y1==Y2 */\\\n        vec##bits U, S, M;                      /* double |p2| */\\\n\\\n        add_##field(U, p2->Y, p2->Y);           /* U = 2*Y1 */\\\n        sqr_##field(p3->ZZ, U);                 /* [ZZ3 =] V = U^2 */\\\n        mul_##field(p3->ZZZ, p3->ZZ, U);        /* [ZZZ3 =] W = U*V */\\\n        mul_##field(S, p2->X, p3->ZZ);          /* S = X1*V */\\\n        sqr_##field(M, p2->X); \\\n        mul_by_3_##field(M, M);                 /* M = 3*X1^2[+a] */\\\n        sqr_##field(p3->X, M); \\\n        add_##field(U, S, S);                   /* 2*S */\\\n        sub_##field(p3->X, p3->X, U);           /* X3 = M^2-2*S */\\\n        mul_##field(p3->Y, p3->ZZZ, p2->Y);     /* W*Y1 */\\\n        sub_##field(S, S, p3->X); \\\n        mul_##field(S, S, M);                   /* M*(S-X3) */\\\n        sub_##field(p3->Y, S, p3->Y);           /* Y3 = M*(S-X3)-W*Y1 */\\\n        cneg_##field(p3->ZZZ, p3->ZZZ, subtract); \\\n    } else {                                    /* X1==X2 && Y1==-Y2 */\\\n        vec_zero(p3->ZZZ, 2*sizeof(p3->ZZZ));   /* set |p3| to infinity */\\\n    } \\\n}\n\n#define POINTXYZZ_TO_JACOBIAN_IMPL(ptype, bits, field) \\\nstatic void ptype##xyzz_to_Jacobian(ptype *out, const ptype##xyzz *in) \\\n{ \\\n    mul_##field(out->X, in->X, in->ZZ); \\\n    mul_##field(out->Y, in->Y, in->ZZZ); \\\n    vec_copy(out->Z, in->ZZ, sizeof(out->Z)); \\\n}\n\n#define POINT_TO_XYZZ_IMPL(ptype, bits, field) \\\nstatic void ptype##_to_xyzz(ptype##xyzz *out, const ptype *in) \\\n{ \\\n    vec_copy(out->X, in->X, 2*sizeof(out->X)); \\\n    sqr_##field(out->ZZ, in->Z); \\\n    mul_##field(out->ZZZ, out->ZZ, in->Z); \\\n}\n\n#endif\n"
  },
  {
    "path": "src/errors.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_ERRORS_H__\n#define __BLS12_381_ASM_ERRORS_H__\n\ntypedef enum {\n    BLST_SUCCESS = 0,\n    BLST_BAD_ENCODING,\n    BLST_POINT_NOT_ON_CURVE,\n    BLST_POINT_NOT_IN_GROUP,\n    BLST_AGGR_TYPE_MISMATCH,\n    BLST_VERIFY_FAIL,\n    BLST_PK_IS_INFINITY,\n} BLST_ERROR;\n\n#endif\n"
  },
  {
    "path": "src/exp.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"vect.h\"\n\n/*\n * |out| = |inp|^|pow|, small footprint, public exponent\n */\nstatic void exp_mont_384(vec384 out, const vec384 inp, const byte *pow,\n                         size_t pow_bits, const vec384 p, limb_t n0)\n{\n#if 1\n    vec384 ret;\n\n    vec_copy(ret, inp, sizeof(ret));  /* ret = inp^1 */\n    --pow_bits; /* most significant bit is set, skip over */\n    while (pow_bits--) {\n        sqr_mont_384(ret, ret, p, n0);\n        if (is_bit_set(pow, pow_bits))\n            mul_mont_384(ret, ret, inp, p, n0);\n    }\n    vec_copy(out, ret, sizeof(ret));  /* out = ret */\n#else\n    unsigned int i;\n    vec384 sqr;\n\n    vec_copy(sqr, inp, sizeof(sqr));\n    for (i = 0; !is_bit_set(pow, i++);)\n        sqr_mont_384(sqr, sqr, sqr, p, n0);\n    vec_copy(out, sqr, sizeof(sqr));\n    for (; i < pow_bits; i++) {\n        sqr_mont_384(sqr, sqr, sqr, p, n0);\n        if (is_bit_set(pow, i))\n            mul_mont_384(out, out, sqr, p, n0);\n    }\n#endif\n}\n\nstatic void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow,\n                          size_t pow_bits, const vec384 p, limb_t n0)\n{\n    vec384x ret;\n\n    vec_copy(ret, inp, sizeof(ret));  /* |ret| = |inp|^1 */\n    --pow_bits; /* most significant bit is accounted for, skip over */\n    while (pow_bits--) {\n        sqr_mont_384x(ret, ret, p, n0);\n        if (is_bit_set(pow, pow_bits))\n            mul_mont_384x(ret, ret, inp, p, n0);\n    }\n    vec_copy(out, ret, sizeof(ret));  /* |out| = |ret| */\n}\n"
  },
  {
    "path": "src/exports.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n/*\n * Why this file? Overall goal is to ensure that all internal calls\n * remain internal after linking application. This is to both\n *\n * a) minimize possibility of external name conflicts (since all\n *    non-blst-prefixed and [assembly subroutines] remain static);\n * b) preclude possibility of unintentional internal reference\n *    overload in shared library context (one can achieve same\n *    effect with -Bsymbolic, but we don't want to rely on end-user\n *    to remember to use it);\n */\n\n#include \"fields.h\"\n#include \"bytes.h\"\n\n/*\n * BLS12-381-specific Fr shortcuts to assembly.\n */\nvoid blst_fr_add(vec256 ret, const vec256 a, const vec256 b)\n{   add_mod_256(ret, a, b, BLS12_381_r);   }\n\nvoid blst_fr_sub(vec256 ret, const vec256 a, const vec256 b)\n{   sub_mod_256(ret, a, b, BLS12_381_r);   }\n\nvoid blst_fr_mul_by_3(vec256 ret, const vec256 a)\n{   mul_by_3_mod_256(ret, a, BLS12_381_r);   }\n\nvoid blst_fr_lshift(vec256 ret, const vec256 a, size_t count)\n{   lshift_mod_256(ret, a, count, BLS12_381_r);   }\n\nvoid blst_fr_rshift(vec256 ret, const vec256 a, size_t count)\n{   rshift_mod_256(ret, a, count, BLS12_381_r);   }\n\nvoid blst_fr_mul(vec256 ret, const vec256 a, const vec256 b)\n{   mul_mont_sparse_256(ret, a, b, BLS12_381_r, r0);   }\n\nvoid blst_fr_ct_bfly(vec256 x0, vec256 x1, const vec256 twiddle)\n{\n    vec256 x2;\n\n    mul_mont_sparse_256(x2, x1, twiddle, BLS12_381_r, r0);\n    sub_mod_256(x1, x0, x2, BLS12_381_r);\n    add_mod_256(x0, x0, x2, BLS12_381_r);\n}\n\nvoid blst_fr_gs_bfly(vec256 x0, vec256 x1, const vec256 twiddle)\n{\n    vec256 x2;\n\n    sub_mod_256(x2, x0, x1, BLS12_381_r);\n    add_mod_256(x0, x0, x1, BLS12_381_r);\n    mul_mont_sparse_256(x1, x2, twiddle, BLS12_381_r, r0);\n}\n\nvoid blst_fr_sqr(vec256 ret, const vec256 a)\n{   sqr_mont_sparse_256(ret, a, BLS12_381_r, r0);   }\n\nvoid blst_fr_cneg(vec256 ret, const vec256 a, int flag)\n{   cneg_mod_256(ret, a, is_zero(flag) ^ 1, BLS12_381_r);   }\n\nvoid blst_fr_to(vec256 ret, const vec256 a)\n{   mul_mont_sparse_256(ret, a, BLS12_381_rRR, BLS12_381_r, r0);   }\n\nvoid blst_fr_from(vec256 ret, const vec256 a)\n{   from_mont_256(ret, a, BLS12_381_r, r0);   }\n\nvoid blst_fr_from_scalar(vec256 ret, const pow256 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if ((uptr_t)ret == (uptr_t)a && is_endian.little) {\n        mul_mont_sparse_256(ret, (const limb_t *)a, BLS12_381_rRR,\n                                                    BLS12_381_r, r0);\n    } else {\n        vec256 out;\n        limbs_from_le_bytes(out, a, 32);\n        mul_mont_sparse_256(ret, out, BLS12_381_rRR, BLS12_381_r, r0);\n        vec_zero(out, sizeof(out));\n    }\n}\n\nvoid blst_scalar_from_fr(pow256 ret, const vec256 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if ((uptr_t)ret == (uptr_t)a && is_endian.little) {\n        from_mont_256((limb_t *)ret, a, BLS12_381_r, r0);\n    } else {\n        vec256 out;\n        from_mont_256(out, a, BLS12_381_r, r0);\n        le_bytes_from_limbs(ret, out, 32);\n        vec_zero(out, sizeof(out));\n    }\n}\n\nint blst_scalar_fr_check(const pow256 a)\n{   return (int)(check_mod_256(a, BLS12_381_r) |\n                 bytes_are_zero(a, sizeof(pow256)));\n}\n\nint blst_sk_check(const pow256 a)\n{   return (int)check_mod_256(a, BLS12_381_r);   }\n\nint blst_sk_add_n_check(pow256 ret, const pow256 a, const pow256 b)\n{   return (int)add_n_check_mod_256(ret, a, b, BLS12_381_r);   }\n\nint blst_sk_sub_n_check(pow256 ret, const pow256 a, const pow256 b)\n{   return (int)sub_n_check_mod_256(ret, a, b, BLS12_381_r);   }\n\nint blst_sk_mul_n_check(pow256 ret, const pow256 a, const pow256 b)\n{\n    vec256 t[2];\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n    bool_t is_zero;\n\n    if (((size_t)a|(size_t)b)%sizeof(limb_t) != 0 || !is_endian.little) {\n        limbs_from_le_bytes(t[0], a, sizeof(pow256));\n        limbs_from_le_bytes(t[1], b, sizeof(pow256));\n        a = (const byte *)t[0];\n        b = (const byte *)t[1];\n    }\n    mul_mont_sparse_256(t[0], BLS12_381_rRR, (const limb_t *)a, BLS12_381_r, r0);\n    mul_mont_sparse_256(t[0], t[0], (const limb_t *)b, BLS12_381_r, r0);\n    le_bytes_from_limbs(ret, t[0], sizeof(pow256));\n    is_zero = vec_is_zero(t[0], sizeof(vec256));\n    vec_zero(t, sizeof(t));\n\n    return (int)(is_zero^1);\n}\n\nvoid blst_sk_inverse(pow256 ret, const pow256 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if (((size_t)a|(size_t)ret)%sizeof(limb_t) == 0 && is_endian.little) {\n        limb_t *out = (limb_t *)ret;\n        mul_mont_sparse_256(out, (const limb_t *)a, BLS12_381_rRR,\n                                                    BLS12_381_r, r0);\n        reciprocal_fr(out, out);\n        from_mont_256(out, out, BLS12_381_r, r0);\n    } else {\n        vec256 out;\n        limbs_from_le_bytes(out, a, 32);\n        mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0);\n        reciprocal_fr(out, out);\n        from_mont_256(out, out, BLS12_381_r, r0);\n        le_bytes_from_limbs(ret, out, 32);\n        vec_zero(out, sizeof(out));\n    }\n}\n\n/*\n * BLS12-381-specific Fp shortcuts to assembly.\n */\nvoid blst_fp_add(vec384 ret, const vec384 a, const vec384 b)\n{   add_fp(ret, a, b);   }\n\nvoid blst_fp_sub(vec384 ret, const vec384 a, const vec384 b)\n{   sub_fp(ret, a, b);   }\n\nvoid blst_fp_mul_by_3(vec384 ret, const vec384 a)\n{   mul_by_3_fp(ret, a);   }\n\nvoid blst_fp_mul_by_8(vec384 ret, const vec384 a)\n{   mul_by_8_fp(ret, a);   }\n\nvoid blst_fp_lshift(vec384 ret, const vec384 a, size_t count)\n{   lshift_fp(ret, a, count);   }\n\nvoid blst_fp_mul(vec384 ret, const vec384 a, const vec384 b)\n{   mul_fp(ret, a, b);   }\n\nvoid blst_fp_sqr(vec384 ret, const vec384 a)\n{   sqr_fp(ret, a);   }\n\nvoid blst_fp_cneg(vec384 ret, const vec384 a, int flag)\n{   cneg_fp(ret, a, is_zero(flag) ^ 1);   }\n\nvoid blst_fp_to(vec384 ret, const vec384 a)\n{   mul_fp(ret, a, BLS12_381_RR);   }\n\nvoid blst_fp_from(vec384 ret, const vec384 a)\n{   from_fp(ret, a);   }\n\n/*\n * Fp serialization/deserialization.\n */\nvoid blst_fp_from_uint32(vec384 ret, const unsigned int a[12])\n{\n    if (sizeof(limb_t) == 8) {\n        int i;\n        for (i = 0; i < 6; i++)\n            ret[i] = a[2*i] | ((limb_t)a[2*i+1] << (32 & (8*sizeof(limb_t)-1)));\n        a = (const unsigned int *)ret;\n    }\n    mul_fp(ret, (const limb_t *)a, BLS12_381_RR);\n}\n\nvoid blst_uint32_from_fp(unsigned int ret[12], const vec384 a)\n{\n    if (sizeof(limb_t) == 4) {\n        from_fp((limb_t *)ret, a);\n    } else {\n        vec384 out;\n        int i;\n\n        from_fp(out, a);\n        for (i = 0; i < 6; i++) {\n            limb_t limb = out[i];\n            ret[2*i]   = (unsigned int)limb;\n            ret[2*i+1] = (unsigned int)(limb >> (32 & (8*sizeof(limb_t)-1)));\n        }\n    }\n}\n\nvoid blst_fp_from_uint64(vec384 ret, const unsigned long long a[6])\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if (sizeof(limb_t) == 4 && !is_endian.little) {\n        int i;\n        for (i = 0; i < 6; i++) {\n            unsigned long long limb = a[i];\n            ret[2*i]   = (limb_t)limb;\n            ret[2*i+1] = (limb_t)(limb >> 32);\n        }\n        a = (const unsigned long long *)ret;\n    }\n    mul_fp(ret, (const limb_t *)a, BLS12_381_RR);\n}\n\nvoid blst_uint64_from_fp(unsigned long long ret[6], const vec384 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if (sizeof(limb_t) == 8 || is_endian.little) {\n        from_fp((limb_t *)ret, a);\n    } else {\n        vec384 out;\n        int i;\n\n        from_fp(out, a);\n        for (i = 0; i < 6; i++)\n            ret[i] = out[2*i] | ((unsigned long long)out[2*i+1] << 32);\n    }\n}\n\nvoid blst_fp_from_bendian(vec384 ret, const unsigned char a[48])\n{\n    vec384 out;\n\n    limbs_from_be_bytes(out, a, sizeof(vec384));\n    mul_fp(ret, out, BLS12_381_RR);\n}\n\nvoid blst_bendian_from_fp(unsigned char ret[48], const vec384 a)\n{\n    vec384 out;\n\n    from_fp(out, a);\n    be_bytes_from_limbs(ret, out, sizeof(vec384));\n}\n\nvoid blst_fp_from_lendian(vec384 ret, const unsigned char a[48])\n{\n    vec384 out;\n\n    limbs_from_le_bytes(out, a, sizeof(vec384));\n    mul_fp(ret, out, BLS12_381_RR);\n}\n\nvoid blst_lendian_from_fp(unsigned char ret[48], const vec384 a)\n{\n    vec384 out;\n\n    from_fp(out, a);\n    le_bytes_from_limbs(ret, out, sizeof(vec384));\n}\n\n/*\n * BLS12-381-specific Fp2 shortcuts to assembly.\n */\nvoid blst_fp2_add(vec384x ret, const vec384x a, const vec384x b)\n{   add_fp2(ret, a, b);   }\n\nvoid blst_fp2_sub(vec384x ret, const vec384x a, const vec384x b)\n{   sub_fp2(ret, a, b);   }\n\nvoid blst_fp2_mul_by_3(vec384x ret, const vec384x a)\n{   mul_by_3_fp2(ret, a);   }\n\nvoid blst_fp2_mul_by_8(vec384x ret, const vec384x a)\n{   mul_by_8_fp2(ret, a);   }\n\nvoid blst_fp2_lshift(vec384x ret, const vec384x a, size_t count)\n{   lshift_fp2(ret, a, count);    }\n\nvoid blst_fp2_mul(vec384x ret, const vec384x a, const vec384x b)\n{   mul_fp2(ret, a, b);   }\n\nvoid blst_fp2_sqr(vec384x ret, const vec384x a)\n{   sqr_fp2(ret, a);   }\n\nvoid blst_fp2_cneg(vec384x ret, const vec384x a, int flag)\n{   cneg_fp2(ret, a, is_zero(flag) ^ 1);   }\n\n/*\n * Scalar serialization/deserialization.\n */\nvoid blst_scalar_from_uint32(pow256 ret, const unsigned int a[8])\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n    size_t i;\n\n    if ((uptr_t)ret==(uptr_t)a && is_endian.little)\n        return;\n\n    for(i = 0; i < 8; i++) {\n        unsigned int w = a[i];\n        *ret++ = (byte)w;\n        *ret++ = (byte)(w >> 8);\n        *ret++ = (byte)(w >> 16);\n        *ret++ = (byte)(w >> 24);\n    }\n}\n\nvoid blst_uint32_from_scalar(unsigned int ret[8], const pow256 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n    size_t i;\n\n    if ((uptr_t)ret==(uptr_t)a && is_endian.little)\n        return;\n\n    for(i = 0; i < 8; i++) {\n        unsigned int w = (unsigned int)(*a++);\n        w |= (unsigned int)(*a++) << 8;\n        w |= (unsigned int)(*a++) << 16;\n        w |= (unsigned int)(*a++) << 24;\n        ret[i] = w;\n    }\n}\n\nvoid blst_scalar_from_uint64(pow256 ret, const unsigned long long a[4])\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n    size_t i;\n\n    if ((uptr_t)ret==(uptr_t)a && is_endian.little)\n        return;\n\n    for(i = 0; i < 4; i++) {\n        unsigned long long w = a[i];\n        *ret++ = (byte)w;\n        *ret++ = (byte)(w >> 8);\n        *ret++ = (byte)(w >> 16);\n        *ret++ = (byte)(w >> 24);\n        *ret++ = (byte)(w >> 32);\n        *ret++ = (byte)(w >> 40);\n        *ret++ = (byte)(w >> 48);\n        *ret++ = (byte)(w >> 56);\n    }\n}\n\nvoid blst_uint64_from_scalar(unsigned long long ret[4], const pow256 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n    size_t i;\n\n    if ((uptr_t)ret==(uptr_t)a && is_endian.little)\n        return;\n\n    for(i = 0; i < 4; i++) {\n        unsigned long long w = (unsigned long long)(*a++);\n        w |= (unsigned long long)(*a++) << 8;\n        w |= (unsigned long long)(*a++) << 16;\n        w |= (unsigned long long)(*a++) << 24;\n        w |= (unsigned long long)(*a++) << 32;\n        w |= (unsigned long long)(*a++) << 40;\n        w |= (unsigned long long)(*a++) << 48;\n        w |= (unsigned long long)(*a++) << 56;\n        ret[i] = w;\n    }\n}\n\nvoid blst_scalar_from_bendian(pow256 ret, const unsigned char a[32])\n{\n    vec256 out;\n    limbs_from_be_bytes(out, a, sizeof(out));\n    le_bytes_from_limbs(ret, out, sizeof(out));\n    vec_zero(out, sizeof(out));\n}\n\nvoid blst_bendian_from_scalar(unsigned char ret[32], const pow256 a)\n{\n    vec256 out;\n    limbs_from_le_bytes(out, a, sizeof(out));\n    be_bytes_from_limbs(ret, out, sizeof(out));\n    vec_zero(out, sizeof(out));\n}\n\nvoid blst_scalar_from_lendian(pow256 ret, const unsigned char a[32])\n{\n    size_t i;\n\n    if ((uptr_t)ret==(uptr_t)a)\n        return;\n\n    for (i = 0; i < 32; i++)\n        ret[i] = a[i];\n}\n\nvoid blst_lendian_from_scalar(unsigned char ret[32], const pow256 a)\n{\n    size_t i;\n\n    if ((uptr_t)ret==(uptr_t)a)\n        return;\n\n    for (i = 0; i < 32; i++)\n        ret[i] = a[i];\n}\n\nvoid blst_fr_from_uint64(vec256 ret, const unsigned long long a[4])\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if (sizeof(limb_t) == 4 && !is_endian.little) {\n        int i;\n        for (i = 0; i < 4; i++) {\n            unsigned long long limb = a[i];\n            ret[2*i]   = (limb_t)limb;\n            ret[2*i+1] = (limb_t)(limb >> 32);\n        }\n        a = (const unsigned long long *)ret;\n    }\n    mul_mont_sparse_256(ret, (const limb_t *)a, BLS12_381_rRR, BLS12_381_r, r0);\n}\n\nvoid blst_uint64_from_fr(unsigned long long ret[4], const vec256 a)\n{\n    const union {\n        long one;\n        char little;\n    } is_endian = { 1 };\n\n    if (sizeof(limb_t) == 8 || is_endian.little) {\n        from_mont_256((limb_t *)ret, a, BLS12_381_r, r0);\n    } else {\n        vec256 out;\n        int i;\n\n        from_mont_256(out, a, BLS12_381_r, r0);\n        for (i = 0; i < 4; i++)\n            ret[i] = out[2*i] | ((unsigned long long)out[2*i+1] << 32);\n        vec_zero(out, sizeof(out));\n    }\n}\n\nint blst_scalar_from_le_bytes(pow256 out, const unsigned char *bytes, size_t n)\n{\n    size_t rem = n ? ((n - 1) % 32 + 1) : 0;\n    struct { vec256 out, digit; } t;\n    limb_t ret;\n\n    vec_zero(t.out, sizeof(t.out));\n\n    n -= rem;\n    limbs_from_le_bytes(t.out, bytes += n, rem);\n    mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0);\n\n    while (n) {\n        limbs_from_le_bytes(t.digit, bytes -= 32, 32);\n        add_mod_256(t.out, t.out, t.digit, BLS12_381_r);\n        mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0);\n        n -= 32;\n    }\n\n    from_mont_256(t.out, t.out, BLS12_381_r, r0);\n\n    ret = vec_is_zero(t.out, sizeof(t.out));\n    le_bytes_from_limbs(out, t.out, 32);\n    vec_zero(&t, sizeof(t));\n\n    return (int)(ret^1);\n}\n\nint blst_scalar_from_be_bytes(pow256 out, const unsigned char *bytes, size_t n)\n{\n    size_t rem = n ? ((n - 1) % 32 + 1) : 0;\n    struct { vec256 out, digit; } t;\n    limb_t ret;\n\n    vec_zero(t.out, sizeof(t.out));\n\n    limbs_from_be_bytes(t.out, bytes, rem);\n    mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0);\n\n    while (n -= rem) {\n        limbs_from_be_bytes(t.digit, bytes += rem, 32);\n        add_mod_256(t.out, t.out, t.digit, BLS12_381_r);\n        mul_mont_sparse_256(t.out, BLS12_381_rRR, t.out, BLS12_381_r, r0);\n        rem = 32;\n    }\n\n    from_mont_256(t.out, t.out, BLS12_381_r, r0);\n\n    ret = vec_is_zero(t.out, sizeof(t.out));\n    le_bytes_from_limbs(out, t.out, 32);\n    vec_zero(&t, sizeof(t));\n\n    return (int)(ret^1);\n}\n\nvoid blst_fp_from_le_bytes(vec384 out, const unsigned char *bytes, size_t n)\n{\n    size_t rem = n ? ((n - 1) % 48 + 1) : 0;\n    vec384 digit;\n\n    vec_zero(out, sizeof(vec384));\n\n    n -= rem;\n    limbs_from_le_bytes(out, bytes += n, rem);\n    mul_mont_384(out, BLS12_381_RR, out, BLS12_381_P, p0);\n\n    while (n) {\n        limbs_from_le_bytes(digit, bytes -= 48, 48);\n        add_mod_384(out, out, digit, BLS12_381_P);\n        mul_mont_384(out, BLS12_381_RR, out, BLS12_381_P, p0);\n        n -= 48;\n    }\n}\n\nvoid blst_fp_from_be_bytes(vec384 out, const unsigned char *bytes, size_t n)\n{\n    size_t rem = n ? ((n - 1) % 48 + 1) : 0;\n    vec384 digit;\n\n    vec_zero(out, sizeof(vec384));\n\n    limbs_from_be_bytes(out, bytes, rem);\n    mul_mont_384(out, BLS12_381_RR, out, BLS12_381_P, p0);\n\n    while (n -= rem) {\n        limbs_from_be_bytes(digit, bytes += rem, 48);\n        add_mod_384(out, out, digit, BLS12_381_P);\n        mul_mont_384(out, BLS12_381_RR, out, BLS12_381_P, p0);\n        rem = 48;\n    }\n}\n\n/*\n * Single-short SHA-256 hash function.\n */\n#include \"sha256.h\"\n\nvoid blst_sha256(unsigned char md[32], const void *msg, size_t len)\n{\n    SHA256_CTX ctx;\n\n    sha256_init(&ctx);\n    sha256_update(&ctx, msg, len);\n    sha256_final(md, &ctx);\n}\n\n/*\n * Test facilitator.\n */\nvoid blst_scalar_from_hexascii(pow256 ret, const char *hex)\n{   bytes_from_hexascii(ret, sizeof(pow256), hex);   }\n\nvoid blst_fr_from_hexascii(vec256 ret, const char *hex)\n{\n    limbs_from_hexascii(ret, sizeof(vec256), hex);\n    mul_mont_sparse_256(ret, ret, BLS12_381_rRR, BLS12_381_r, r0);\n}\n\nvoid blst_fp_from_hexascii(vec384 ret, const char *hex)\n{\n    limbs_from_hexascii(ret, sizeof(vec384), hex);\n    mul_fp(ret, ret, BLS12_381_RR);\n}\n"
  },
  {
    "path": "src/fields.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_FIELDS_H__\n#define __BLS12_381_ASM_FIELDS_H__\n\n#include \"vect.h\"\n#include \"consts.h\"\n\n/*\n * BLS12-381-specific Fp shortcuts to assembly.\n */\nstatic inline void add_fp(vec384 ret, const vec384 a, const vec384 b)\n{   add_mod_384(ret, a, b, BLS12_381_P);   }\n\nstatic inline void sub_fp(vec384 ret, const vec384 a, const vec384 b)\n{   sub_mod_384(ret, a, b, BLS12_381_P);   }\n\nstatic inline void mul_by_3_fp(vec384 ret, const vec384 a)\n{   mul_by_3_mod_384(ret, a, BLS12_381_P);   }\n\nstatic inline void mul_by_8_fp(vec384 ret, const vec384 a)\n{   mul_by_8_mod_384(ret, a, BLS12_381_P);   }\n\nstatic inline void lshift_fp(vec384 ret, const vec384 a, size_t count)\n{   lshift_mod_384(ret, a, count, BLS12_381_P);   }\n\nstatic inline void rshift_fp(vec384 ret, const vec384 a, size_t count)\n{   rshift_mod_384(ret, a, count, BLS12_381_P);   }\n\nstatic inline void div_by_2_fp(vec384 ret, const vec384 a)\n{   div_by_2_mod_384(ret, a, BLS12_381_P);   }\n\nstatic inline void mul_fp(vec384 ret, const vec384 a, const vec384 b)\n{   mul_mont_384(ret, a, b, BLS12_381_P, p0);   }\n\nstatic inline void sqr_fp(vec384 ret, const vec384 a)\n{   sqr_mont_384(ret, a, BLS12_381_P, p0);   }\n\nstatic inline void cneg_fp(vec384 ret, const vec384 a, bool_t flag)\n{   cneg_mod_384(ret, a, flag, BLS12_381_P);   }\n\nstatic inline void from_fp(vec384 ret, const vec384 a)\n{   from_mont_384(ret, a, BLS12_381_P, p0);   }\n\nstatic inline void redc_fp(vec384 ret, const vec768 a)\n{   redc_mont_384(ret, a, BLS12_381_P, p0);   }\n\n/*\n * BLS12-381-specific Fp2 shortcuts to assembly.\n */\nstatic inline void add_fp2(vec384x ret, const vec384x a, const vec384x b)\n{   add_mod_384x(ret, a, b, BLS12_381_P);   }\n\nstatic inline void sub_fp2(vec384x ret, const vec384x a, const vec384x b)\n{   sub_mod_384x(ret, a, b, BLS12_381_P);   }\n\nstatic inline void mul_by_3_fp2(vec384x ret, const vec384x a)\n{   mul_by_3_mod_384x(ret, a, BLS12_381_P);   }\n\nstatic inline void mul_by_8_fp2(vec384x ret, const vec384x a)\n{   mul_by_8_mod_384x(ret, a, BLS12_381_P);   }\n\nstatic inline void lshift_fp2(vec384x ret, const vec384x a, size_t count)\n{\n    lshift_mod_384(ret[0], a[0], count, BLS12_381_P);\n    lshift_mod_384(ret[1], a[1], count, BLS12_381_P);\n}\n\nstatic inline void mul_fp2(vec384x ret, const vec384x a, const vec384x b)\n{   mul_mont_384x(ret, a, b, BLS12_381_P, p0);   }\n\nstatic inline void sqr_fp2(vec384x ret, const vec384x a)\n{   sqr_mont_384x(ret, a, BLS12_381_P, p0);   }\n\nstatic inline void cneg_fp2(vec384x ret, const vec384x a, bool_t flag)\n{\n    cneg_mod_384(ret[0], a[0], flag, BLS12_381_P);\n    cneg_mod_384(ret[1], a[1], flag, BLS12_381_P);\n}\n\n#define vec_load_global vec_copy\n\nstatic void reciprocal_fp(vec384 out, const vec384 inp);\nstatic void flt_reciprocal_fp(vec384 out, const vec384 inp);\nstatic bool_t recip_sqrt_fp(vec384 out, const vec384 inp);\nstatic bool_t sqrt_fp(vec384 out, const vec384 inp);\n\nstatic void reciprocal_fp2(vec384x out, const vec384x inp);\nstatic void flt_reciprocal_fp2(vec384x out, const vec384x inp);\nstatic bool_t recip_sqrt_fp2(vec384x out, const vec384x inp,\n                             const vec384x recip_ZZZ, const vec384x magic_ZZZ);\nstatic bool_t sqrt_fp2(vec384x out, const vec384x inp);\nstatic bool_t sqrt_align_fp2(vec384x out, const vec384x ret,\n                             const vec384x sqrt, const vec384x inp);\n\ntypedef vec384x   vec384fp2;\ntypedef vec384fp2 vec384fp6[3];\ntypedef vec384fp6 vec384fp12[2];\n\nstatic void sqr_fp12(vec384fp12 ret, const vec384fp12 a);\nstatic void cyclotomic_sqr_fp12(vec384fp12 ret, const vec384fp12 a);\nstatic void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b);\nstatic void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a,\n                                               const vec384fp6 xy00z0);\nstatic void conjugate_fp12(vec384fp12 a);\nstatic void inverse_fp12(vec384fp12 ret, const vec384fp12 a);\n/* caveat lector! |n| has to be non-zero and not more than 3! */\nstatic void frobenius_map_fp12(vec384fp12 ret, const vec384fp12 a, size_t n);\n\n#define neg_fp(r,a) cneg_fp((r),(a),1)\n#define neg_fp2(r,a) cneg_fp2((r),(a),1)\n\n#endif /* __BLS12_381_ASM_FIELDS_H__ */\n"
  },
  {
    "path": "src/fp12_tower.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"fields.h\"\n\n/*\n * Fp2  = Fp[u]  / (u^2 + 1)\n * Fp6  = Fp2[v] / (v^3 - u - 1)\n * Fp12 = Fp6[w] / (w^2 - v)\n */\n\nstatic inline void mul_by_u_plus_1_fp2(vec384x ret, const vec384x a)\n{   mul_by_1_plus_i_mod_384x(ret, a, BLS12_381_P);   }\n\n#if 1 && !defined(__BLST_NO_ASM__)\n#define __FP2x2__\n/*\n * Fp2x2 is a \"widened\" version of Fp2, which allows to consolidate\n * reductions from several multiplications. In other words instead of\n * \"mul_redc-mul_redc-add\" we get \"mul-mul-add-redc,\" where latter\n * addition is double-width... To be more specific this gives ~7-10%\n * faster pairing depending on platform...\n */\ntypedef vec768 vec768x[2];\n\nstatic inline void add_fp2x2(vec768x ret, const vec768x a, const vec768x b)\n{\n    add_mod_384x384(ret[0], a[0], b[0], BLS12_381_P);\n    add_mod_384x384(ret[1], a[1], b[1], BLS12_381_P);\n}\n\nstatic inline void sub_fp2x2(vec768x ret, const vec768x a, const vec768x b)\n{\n    sub_mod_384x384(ret[0], a[0], b[0], BLS12_381_P);\n    sub_mod_384x384(ret[1], a[1], b[1], BLS12_381_P);\n}\n\nstatic inline void mul_by_u_plus_1_fp2x2(vec768x ret, const vec768x a)\n{\n    /* caveat lector! |ret| may not be same as |a| */\n    sub_mod_384x384(ret[0], a[0], a[1], BLS12_381_P);\n    add_mod_384x384(ret[1], a[0], a[1], BLS12_381_P);\n}\n\nstatic inline void redc_fp2x2(vec384x ret, const vec768x a)\n{\n    redc_mont_384(ret[0], a[0], BLS12_381_P, p0);\n    redc_mont_384(ret[1], a[1], BLS12_381_P, p0);\n}\n\nstatic void mul_fp2x2(vec768x ret, const vec384x a, const vec384x b)\n{\n#if 1\n    mul_382x(ret, a, b, BLS12_381_P);   /* +~6% in Miller loop */\n#else\n    union { vec384 x[2]; vec768 x2; } t;\n\n    add_mod_384(t.x[0], a[0], a[1], BLS12_381_P);\n    add_mod_384(t.x[1], b[0], b[1], BLS12_381_P);\n    mul_384(ret[1], t.x[0], t.x[1]);\n\n    mul_384(ret[0], a[0], b[0]);\n    mul_384(t.x2,   a[1], b[1]);\n\n    sub_mod_384x384(ret[1], ret[1], ret[0], BLS12_381_P);\n    sub_mod_384x384(ret[1], ret[1], t.x2, BLS12_381_P);\n\n    sub_mod_384x384(ret[0], ret[0], t.x2, BLS12_381_P);\n#endif\n}\n\nstatic void sqr_fp2x2(vec768x ret, const vec384x a)\n{\n#if 1\n    sqr_382x(ret, a, BLS12_381_P);      /* +~5% in final exponentiation */\n#else\n    vec384 t0, t1;\n\n    add_mod_384(t0, a[0], a[1], BLS12_381_P);\n    sub_mod_384(t1, a[0], a[1], BLS12_381_P);\n\n    mul_384(ret[1], a[0], a[1]);\n    add_mod_384x384(ret[1], ret[1], ret[1], BLS12_381_P);\n\n    mul_384(ret[0], t0, t1);\n#endif\n}\n#endif  /* __FP2x2__ */\n\n/*\n * Fp6 extension\n */\n#if defined(__FP2x2__)  /* ~10-13% improvement for mul_fp12 and sqr_fp12 */\ntypedef vec768x vec768fp6[3];\n\nstatic inline void sub_fp6x2(vec768fp6 ret, const vec768fp6 a,\n                                            const vec768fp6 b)\n{\n    sub_fp2x2(ret[0], a[0], b[0]);\n    sub_fp2x2(ret[1], a[1], b[1]);\n    sub_fp2x2(ret[2], a[2], b[2]);\n}\n\nstatic void mul_fp6x2(vec768fp6 ret, const vec384fp6 a, const vec384fp6 b)\n{\n    vec768x t0, t1, t2;\n    vec384x aa, bb;\n\n    mul_fp2x2(t0, a[0], b[0]);\n    mul_fp2x2(t1, a[1], b[1]);\n    mul_fp2x2(t2, a[2], b[2]);\n\n    /* ret[0] = ((a1 + a2)*(b1 + b2) - a1*b1 - a2*b2)*(u+1) + a0*b0\n              = (a1*b2 + a2*b1)*(u+1) + a0*b0 */\n    add_fp2(aa, a[1], a[2]);\n    add_fp2(bb, b[1], b[2]);\n    mul_fp2x2(ret[0], aa, bb);\n    sub_fp2x2(ret[0], ret[0], t1);\n    sub_fp2x2(ret[0], ret[0], t2);\n    mul_by_u_plus_1_fp2x2(ret[1], ret[0]);  /* borrow ret[1] for a moment */\n    add_fp2x2(ret[0], ret[1], t0);\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*b2*(u+1)\n              = a0*b1 + a1*b0 + a2*b2*(u+1) */\n    add_fp2(aa, a[0], a[1]);\n    add_fp2(bb, b[0], b[1]);\n    mul_fp2x2(ret[1], aa, bb);\n    sub_fp2x2(ret[1], ret[1], t0);\n    sub_fp2x2(ret[1], ret[1], t1);\n    mul_by_u_plus_1_fp2x2(ret[2], t2);      /* borrow ret[2] for a moment */\n    add_fp2x2(ret[1], ret[1], ret[2]);\n\n    /* ret[2] = (a0 + a2)*(b0 + b2) - a0*b0 - a2*b2 + a1*b1\n              = a0*b2 + a2*b0 + a1*b1 */\n    add_fp2(aa, a[0], a[2]);\n    add_fp2(bb, b[0], b[2]);\n    mul_fp2x2(ret[2], aa, bb);\n    sub_fp2x2(ret[2], ret[2], t0);\n    sub_fp2x2(ret[2], ret[2], t2);\n    add_fp2x2(ret[2], ret[2], t1);\n}\n\nstatic inline void redc_fp6x2(vec384fp6 ret, const vec768fp6 a)\n{\n    redc_fp2x2(ret[0], a[0]);\n    redc_fp2x2(ret[1], a[1]);\n    redc_fp2x2(ret[2], a[2]);\n}\n\nstatic void mul_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b)\n{\n    vec768fp6 r;\n\n    mul_fp6x2(r, a, b);\n    redc_fp6x2(ret, r); /* narrow to normal width */\n}\n\nstatic void sqr_fp6(vec384fp6 ret, const vec384fp6 a)\n{\n    vec768x s0, m01, m12, s2, rx;\n\n    sqr_fp2x2(s0, a[0]);\n\n    mul_fp2x2(m01, a[0], a[1]);\n    add_fp2x2(m01, m01, m01);\n\n    mul_fp2x2(m12, a[1], a[2]);\n    add_fp2x2(m12, m12, m12);\n\n    sqr_fp2x2(s2, a[2]);\n\n    /* ret[2] = (a0 + a1 + a2)^2 - a0^2 - a2^2 - 2*(a0*a1) - 2*(a1*a2)\n              = a1^2 + 2*(a0*a2) */\n    add_fp2(ret[2], a[2], a[1]);\n    add_fp2(ret[2], ret[2], a[0]);\n    sqr_fp2x2(rx, ret[2]);\n    sub_fp2x2(rx, rx, s0);\n    sub_fp2x2(rx, rx, s2);\n    sub_fp2x2(rx, rx, m01);\n    sub_fp2x2(rx, rx, m12);\n    redc_fp2x2(ret[2], rx);\n\n    /* ret[0] = a0^2 + 2*(a1*a2)*(u+1) */\n    mul_by_u_plus_1_fp2x2(rx, m12);\n    add_fp2x2(rx, rx, s0);\n    redc_fp2x2(ret[0], rx);\n\n    /* ret[1] = a2^2*(u+1) + 2*(a0*a1) */\n    mul_by_u_plus_1_fp2x2(rx, s2);\n    add_fp2x2(rx, rx, m01);\n    redc_fp2x2(ret[1], rx);\n}\n#else\nstatic void mul_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b)\n{\n    vec384x t0, t1, t2, t3, t4, t5;\n\n    mul_fp2(t0, a[0], b[0]);\n    mul_fp2(t1, a[1], b[1]);\n    mul_fp2(t2, a[2], b[2]);\n\n    /* ret[0] = ((a1 + a2)*(b1 + b2) - a1*b1 - a2*b2)*(u+1) + a0*b0\n              = (a1*b2 + a2*b1)*(u+1) + a0*b0 */\n    add_fp2(t4, a[1], a[2]);\n    add_fp2(t5, b[1], b[2]);\n    mul_fp2(t3, t4, t5);\n    sub_fp2(t3, t3, t1);\n    sub_fp2(t3, t3, t2);\n    mul_by_u_plus_1_fp2(t3, t3);\n    /* add_fp2(ret[0], t3, t0); considering possible aliasing... */\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*b2*(u+1)\n              = a0*b1 + a1*b0 + a2*b2*(u+1) */\n    add_fp2(t4, a[0], a[1]);\n    add_fp2(t5, b[0], b[1]);\n    mul_fp2(ret[1], t4, t5);\n    sub_fp2(ret[1], ret[1], t0);\n    sub_fp2(ret[1], ret[1], t1);\n    mul_by_u_plus_1_fp2(t4, t2);\n    add_fp2(ret[1], ret[1], t4);\n\n    /* ret[2] = (a0 + a2)*(b0 + b2) - a0*b0 - a2*b2 + a1*b1\n              = a0*b2 + a2*b0 + a1*b1 */\n    add_fp2(t4, a[0], a[2]);\n    add_fp2(t5, b[0], b[2]);\n    mul_fp2(ret[2], t4, t5);\n    sub_fp2(ret[2], ret[2], t0);\n    sub_fp2(ret[2], ret[2], t2);\n    add_fp2(ret[2], ret[2], t1);\n\n    add_fp2(ret[0], t3, t0);    /* ... moved from above */\n}\n\nstatic void sqr_fp6(vec384fp6 ret, const vec384fp6 a)\n{\n    vec384x s0, m01, m12, s2;\n\n    sqr_fp2(s0, a[0]);\n\n    mul_fp2(m01, a[0], a[1]);\n    add_fp2(m01, m01, m01);\n\n    mul_fp2(m12, a[1], a[2]);\n    add_fp2(m12, m12, m12);\n\n    sqr_fp2(s2, a[2]);\n\n    /* ret[2] = (a0 + a1 + a2)^2 - a0^2 - a2^2 - 2*(a0*a1) - 2*(a1*a2)\n              = a1^2 + 2*(a0*a2) */\n    add_fp2(ret[2], a[2], a[1]);\n    add_fp2(ret[2], ret[2], a[0]);\n    sqr_fp2(ret[2], ret[2]);\n    sub_fp2(ret[2], ret[2], s0);\n    sub_fp2(ret[2], ret[2], s2);\n    sub_fp2(ret[2], ret[2], m01);\n    sub_fp2(ret[2], ret[2], m12);\n\n    /* ret[0] = a0^2 + 2*(a1*a2)*(u+1) */\n    mul_by_u_plus_1_fp2(ret[0], m12);\n    add_fp2(ret[0], ret[0], s0);\n\n    /* ret[1] = a2^2*(u+1) + 2*(a0*a1) */\n    mul_by_u_plus_1_fp2(ret[1], s2);\n    add_fp2(ret[1], ret[1], m01);\n}\n#endif\n\nstatic void add_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b)\n{\n    add_fp2(ret[0], a[0], b[0]);\n    add_fp2(ret[1], a[1], b[1]);\n    add_fp2(ret[2], a[2], b[2]);\n}\n\nstatic void sub_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b)\n{\n    sub_fp2(ret[0], a[0], b[0]);\n    sub_fp2(ret[1], a[1], b[1]);\n    sub_fp2(ret[2], a[2], b[2]);\n}\n\nstatic void neg_fp6(vec384fp6 ret, const vec384fp6 a)\n{\n    neg_fp2(ret[0], a[0]);\n    neg_fp2(ret[1], a[1]);\n    neg_fp2(ret[2], a[2]);\n}\n\n#if 0\n#define mul_by_v_fp6 mul_by_v_fp6\nstatic void mul_by_v_fp6(vec384fp6 ret, const vec384fp6 a)\n{\n    vec384x t;\n\n    mul_by_u_plus_1_fp2(t, a[2]);\n    vec_copy(ret[2], a[1], sizeof(a[1]));\n    vec_copy(ret[1], a[0], sizeof(a[0]));\n    vec_copy(ret[0], t, sizeof(t));\n}\n#endif\n\n/*\n * Fp12 extension\n */\n#if defined(__FP2x2__)\nstatic void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b)\n{\n    vec768fp6 t0, t1, rx;\n    vec384fp6 t2;\n\n    mul_fp6x2(t0, a[0], b[0]);\n    mul_fp6x2(t1, a[1], b[1]);\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1\n              = a0*b1 + a1*b0 */\n    add_fp6(t2, a[0], a[1]);\n    add_fp6(ret[1], b[0], b[1]);\n    mul_fp6x2(rx, ret[1], t2);\n    sub_fp6x2(rx, rx, t0);\n    sub_fp6x2(rx, rx, t1);\n    redc_fp6x2(ret[1], rx);\n\n    /* ret[0] = a0*b0 + a1*b1*v */\n    mul_by_u_plus_1_fp2x2(rx[0], t1[2]);\n    add_fp2x2(rx[0], t0[0], rx[0]);\n    add_fp2x2(rx[1], t0[1], t1[0]);\n    add_fp2x2(rx[2], t0[2], t1[1]);\n    redc_fp6x2(ret[0], rx);\n}\n\nstatic inline void mul_by_0y0_fp6x2(vec768fp6 ret, const vec384fp6 a,\n                                                   const vec384fp2 b)\n{\n    mul_fp2x2(ret[1], a[2], b);     /* borrow ret[1] for a moment */\n    mul_by_u_plus_1_fp2x2(ret[0], ret[1]);\n    mul_fp2x2(ret[1], a[0], b);\n    mul_fp2x2(ret[2], a[1], b);\n}\n\nstatic void mul_by_xy0_fp6x2(vec768fp6 ret, const vec384fp6 a,\n                                            const vec384fp6 b)\n{\n    vec768x t0, t1;\n    vec384x aa, bb;\n\n    mul_fp2x2(t0, a[0], b[0]);\n    mul_fp2x2(t1, a[1], b[1]);\n\n    /* ret[0] = ((a1 + a2)*(b1 + 0) - a1*b1 - a2*0)*(u+1) + a0*b0\n              = (a1*0 + a2*b1)*(u+1) + a0*b0 */\n    mul_fp2x2(ret[1], a[2], b[1]);  /* borrow ret[1] for a moment */\n    mul_by_u_plus_1_fp2x2(ret[0], ret[1]);\n    add_fp2x2(ret[0], ret[0], t0);\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*0*(u+1)\n              = a0*b1 + a1*b0 + a2*0*(u+1) */\n    add_fp2(aa, a[0], a[1]);\n    add_fp2(bb, b[0], b[1]);\n    mul_fp2x2(ret[1], aa, bb);\n    sub_fp2x2(ret[1], ret[1], t0);\n    sub_fp2x2(ret[1], ret[1], t1);\n\n    /* ret[2] = (a0 + a2)*(b0 + 0) - a0*b0 - a2*0 + a1*b1\n              = a0*0 + a2*b0 + a1*b1 */\n    mul_fp2x2(ret[2], a[2], b[0]);\n    add_fp2x2(ret[2], ret[2], t1);\n}\n\nstatic void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a,\n                                               const vec384fp6 xy00z0)\n{\n    vec768fp6 t0, t1, rr;\n    vec384fp6 t2;\n\n    mul_by_xy0_fp6x2(t0, a[0], xy00z0);\n    mul_by_0y0_fp6x2(t1, a[1], xy00z0[2]);\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1\n              = a0*b1 + a1*b0 */\n    vec_copy(t2[0], xy00z0[0], sizeof(t2[0]));\n    add_fp2(t2[1], xy00z0[1], xy00z0[2]);\n    add_fp6(ret[1], a[0], a[1]);\n    mul_by_xy0_fp6x2(rr, ret[1], t2);\n    sub_fp6x2(rr, rr, t0);\n    sub_fp6x2(rr, rr, t1);\n    redc_fp6x2(ret[1], rr);\n\n    /* ret[0] = a0*b0 + a1*b1*v */\n    mul_by_u_plus_1_fp2x2(rr[0], t1[2]);\n    add_fp2x2(rr[0], t0[0], rr[0]);\n    add_fp2x2(rr[1], t0[1], t1[0]);\n    add_fp2x2(rr[2], t0[2], t1[1]);\n    redc_fp6x2(ret[0], rr);\n}\n#else\nstatic void mul_fp12(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b)\n{\n    vec384fp6 t0, t1, t2;\n\n    mul_fp6(t0, a[0], b[0]);\n    mul_fp6(t1, a[1], b[1]);\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1\n              = a0*b1 + a1*b0 */\n    add_fp6(t2, a[0], a[1]);\n    add_fp6(ret[1], b[0], b[1]);\n    mul_fp6(ret[1], ret[1], t2);\n    sub_fp6(ret[1], ret[1], t0);\n    sub_fp6(ret[1], ret[1], t1);\n\n    /* ret[0] = a0*b0 + a1*b1*v */\n#ifdef mul_by_v_fp6\n    mul_by_v_fp6(t1, t1);\n    add_fp6(ret[0], t0, t1);\n#else\n    mul_by_u_plus_1_fp2(t1[2], t1[2]);\n    add_fp2(ret[0][0], t0[0], t1[2]);\n    add_fp2(ret[0][1], t0[1], t1[0]);\n    add_fp2(ret[0][2], t0[2], t1[1]);\n#endif\n}\n\nstatic inline void mul_by_0y0_fp6(vec384fp6 ret, const vec384fp6 a,\n                                                 const vec384fp2 b)\n{\n    vec384x t;\n\n    mul_fp2(t,      a[2], b);\n    mul_fp2(ret[2], a[1], b);\n    mul_fp2(ret[1], a[0], b);\n    mul_by_u_plus_1_fp2(ret[0], t);\n}\n\nstatic void mul_by_xy0_fp6(vec384fp6 ret, const vec384fp6 a, const vec384fp6 b)\n{\n    vec384x t0, t1, /*t2,*/ t3, t4, t5;\n\n    mul_fp2(t0, a[0], b[0]);\n    mul_fp2(t1, a[1], b[1]);\n\n    /* ret[0] = ((a1 + a2)*(b1 + 0) - a1*b1 - a2*0)*(u+1) + a0*b0\n              = (a1*0 + a2*b1)*(u+1) + a0*b0 */\n    mul_fp2(t3, a[2], b[1]);\n    mul_by_u_plus_1_fp2(t3, t3);\n    /* add_fp2(ret[0], t3, t0); considering possible aliasing... */\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1 + a2*0*(u+1)\n              = a0*b1 + a1*b0 + a2*0*(u+1) */\n    add_fp2(t4, a[0], a[1]);\n    add_fp2(t5, b[0], b[1]);\n    mul_fp2(ret[1], t4, t5);\n    sub_fp2(ret[1], ret[1], t0);\n    sub_fp2(ret[1], ret[1], t1);\n\n    /* ret[2] = (a0 + a2)*(b0 + 0) - a0*b0 - a2*0 + a1*b1\n              = a0*0 + a2*b0 + a1*b1 */\n    mul_fp2(ret[2], a[2], b[0]);\n    add_fp2(ret[2], ret[2], t1);\n\n    add_fp2(ret[0], t3, t0);    /* ... moved from above */\n}\n\nstatic void mul_by_xy00z0_fp12(vec384fp12 ret, const vec384fp12 a,\n                                               const vec384fp6 xy00z0)\n{\n    vec384fp6 t0, t1, t2;\n\n    mul_by_xy0_fp6(t0, a[0], xy00z0);\n    mul_by_0y0_fp6(t1, a[1], xy00z0[2]);\n\n    /* ret[1] = (a0 + a1)*(b0 + b1) - a0*b0 - a1*b1\n              = a0*b1 + a1*b0 */\n    vec_copy(t2[0], xy00z0[0], sizeof(t2[0]));\n    add_fp2(t2[1], xy00z0[1], xy00z0[2]);\n    add_fp6(ret[1], a[0], a[1]);\n    mul_by_xy0_fp6(ret[1], ret[1], t2);\n    sub_fp6(ret[1], ret[1], t0);\n    sub_fp6(ret[1], ret[1], t1);\n\n    /* ret[0] = a0*b0 + a1*b1*v */\n#ifdef mul_by_v_fp6\n    mul_by_v_fp6(t1, t1);\n    add_fp6(ret[0], t0, t1);\n#else\n    mul_by_u_plus_1_fp2(t1[2], t1[2]);\n    add_fp2(ret[0][0], t0[0], t1[2]);\n    add_fp2(ret[0][1], t0[1], t1[0]);\n    add_fp2(ret[0][2], t0[2], t1[1]);\n#endif\n}\n#endif\n\nstatic void sqr_fp12(vec384fp12 ret, const vec384fp12 a)\n{\n    vec384fp6 t0, t1;\n\n    add_fp6(t0, a[0], a[1]);\n#ifdef mul_by_v_fp6\n    mul_by_v_fp6(t1, a[1]);\n    add_fp6(t1, a[0], t1);\n#else\n    mul_by_u_plus_1_fp2(t1[2], a[1][2]);\n    add_fp2(t1[0], a[0][0], t1[2]);\n    add_fp2(t1[1], a[0][1], a[1][0]);\n    add_fp2(t1[2], a[0][2], a[1][1]);\n#endif\n    mul_fp6(t0, t0, t1);\n    mul_fp6(t1, a[0], a[1]);\n\n    /* ret[1] = 2*(a0*a1) */\n    add_fp6(ret[1], t1, t1);\n\n    /* ret[0] = (a0 + a1)*(a0 + a1*v) - a0*a1 - a0*a1*v\n              = a0^2 + a1^2*v */\n    sub_fp6(ret[0], t0, t1);\n#ifdef mul_by_v_fp6\n    mul_by_v_fp6(t1, t1);\n    sub_fp6(ret[0], ret[0], t1);\n#else\n    mul_by_u_plus_1_fp2(t1[2], t1[2]);\n    sub_fp2(ret[0][0], ret[0][0], t1[2]);\n    sub_fp2(ret[0][1], ret[0][1], t1[0]);\n    sub_fp2(ret[0][2], ret[0][2], t1[1]);\n#endif\n}\n\nstatic void conjugate_fp12(vec384fp12 a)\n{   neg_fp6(a[1], a[1]);   }\n\nstatic void inverse_fp6(vec384fp6 ret, const vec384fp6 a)\n{\n    vec384x c0, c1, c2, t0, t1;\n\n    /* c0 = a0^2 - (a1*a2)*(u+1) */\n    sqr_fp2(c0, a[0]);\n    mul_fp2(t0, a[1], a[2]);\n    mul_by_u_plus_1_fp2(t0, t0);\n    sub_fp2(c0, c0, t0);\n\n    /* c1 = a2^2*(u+1) - (a0*a1) */\n    sqr_fp2(c1, a[2]);\n    mul_by_u_plus_1_fp2(c1, c1);\n    mul_fp2(t0, a[0], a[1]);\n    sub_fp2(c1, c1, t0);\n\n    /* c2 = a1^2 - a0*a2 */\n    sqr_fp2(c2, a[1]);\n    mul_fp2(t0, a[0], a[2]);\n    sub_fp2(c2, c2, t0);\n\n    /* (a2*c1 + a1*c2)*(u+1) + a0*c0 */\n    mul_fp2(t0, c1, a[2]);\n    mul_fp2(t1, c2, a[1]);\n    add_fp2(t0, t0, t1);\n    mul_by_u_plus_1_fp2(t0, t0);\n    mul_fp2(t1, c0, a[0]);\n    add_fp2(t0, t0, t1);\n\n    reciprocal_fp2(t1, t0);\n\n    mul_fp2(ret[0], c0, t1);\n    mul_fp2(ret[1], c1, t1);\n    mul_fp2(ret[2], c2, t1);\n}\n\nstatic void inverse_fp12(vec384fp12 ret, const vec384fp12 a)\n{\n    vec384fp6 t0, t1;\n\n    sqr_fp6(t0, a[0]);\n    sqr_fp6(t1, a[1]);\n#ifdef mul_by_v_fp6\n    mul_by_v_fp6(t1, t1);\n    sub_fp6(t0, t0, t1);\n#else\n    mul_by_u_plus_1_fp2(t1[2], t1[2]);\n    sub_fp2(t0[0], t0[0], t1[2]);\n    sub_fp2(t0[1], t0[1], t1[0]);\n    sub_fp2(t0[2], t0[2], t1[1]);\n#endif\n\n    inverse_fp6(t1, t0);\n\n    mul_fp6(ret[0], a[0], t1);\n    mul_fp6(ret[1], a[1], t1);\n    neg_fp6(ret[1], ret[1]);\n}\n\ntypedef vec384x vec384fp4[2];\n\n#if defined(__FP2x2__)\nstatic void sqr_fp4(vec384fp4 ret, const vec384x a0, const vec384x a1)\n{\n    vec768x t0, t1, t2;\n\n    sqr_fp2x2(t0, a0);\n    sqr_fp2x2(t1, a1);\n    add_fp2(ret[1], a0, a1);\n\n    mul_by_u_plus_1_fp2x2(t2, t1);\n    add_fp2x2(t2, t2, t0);\n    redc_fp2x2(ret[0], t2);\n\n    sqr_fp2x2(t2, ret[1]);\n    sub_fp2x2(t2, t2, t0);\n    sub_fp2x2(t2, t2, t1);\n    redc_fp2x2(ret[1], t2);\n}\n#else\nstatic void sqr_fp4(vec384fp4 ret, const vec384x a0, const vec384x a1)\n{\n    vec384x t0, t1;\n\n    sqr_fp2(t0, a0);\n    sqr_fp2(t1, a1);\n    add_fp2(ret[1], a0, a1);\n\n    mul_by_u_plus_1_fp2(ret[0], t1);\n    add_fp2(ret[0], ret[0], t0);\n\n    sqr_fp2(ret[1], ret[1]);\n    sub_fp2(ret[1], ret[1], t0);\n    sub_fp2(ret[1], ret[1], t1);\n}\n#endif\n\nstatic void cyclotomic_sqr_fp12(vec384fp12 ret, const vec384fp12 a)\n{\n    vec384fp4 t0, t1, t2;\n\n    sqr_fp4(t0, a[0][0], a[1][1]);\n    sqr_fp4(t1, a[1][0], a[0][2]);\n    sqr_fp4(t2, a[0][1], a[1][2]);\n\n    sub_fp2(ret[0][0], t0[0],     a[0][0]);\n    add_fp2(ret[0][0], ret[0][0], ret[0][0]);\n    add_fp2(ret[0][0], ret[0][0], t0[0]);\n\n    sub_fp2(ret[0][1], t1[0],     a[0][1]);\n    add_fp2(ret[0][1], ret[0][1], ret[0][1]);\n    add_fp2(ret[0][1], ret[0][1], t1[0]);\n\n    sub_fp2(ret[0][2], t2[0],     a[0][2]);\n    add_fp2(ret[0][2], ret[0][2], ret[0][2]);\n    add_fp2(ret[0][2], ret[0][2], t2[0]);\n\n    mul_by_u_plus_1_fp2(t2[1], t2[1]);\n    add_fp2(ret[1][0], t2[1],     a[1][0]);\n    add_fp2(ret[1][0], ret[1][0], ret[1][0]);\n    add_fp2(ret[1][0], ret[1][0], t2[1]);\n\n    add_fp2(ret[1][1], t0[1],     a[1][1]);\n    add_fp2(ret[1][1], ret[1][1], ret[1][1]);\n    add_fp2(ret[1][1], ret[1][1], t0[1]);\n\n    add_fp2(ret[1][2], t1[1],     a[1][2]);\n    add_fp2(ret[1][2], ret[1][2], ret[1][2]);\n    add_fp2(ret[1][2], ret[1][2], t1[1]);\n}\n\n/*\n * caveat lector! |n| has to be non-zero and not more than 3!\n */\nstatic inline void frobenius_map_fp2(vec384x ret, const vec384x a, size_t n)\n{\n    vec_copy(ret[0], a[0], sizeof(ret[0]));\n    cneg_fp(ret[1], a[1], n & 1);\n}\n\nstatic void frobenius_map_fp6(vec384fp6 ret, const vec384fp6 a, size_t n)\n{\n    static const vec384x coeffs1[] = {  /* (u + 1)^((P^n - 1) / 3) */\n      { { 0 },\n        { TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2),\n          TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e),\n          TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741) } },\n      { { TO_LIMB_T(0x30f1361b798a64e8), TO_LIMB_T(0xf3b8ddab7ece5a2a),\n          TO_LIMB_T(0x16a8ca3ac61577f7), TO_LIMB_T(0xc26a2ff874fd029b),\n          TO_LIMB_T(0x3636b76660701c6e), TO_LIMB_T(0x051ba4ab241b6160) } },\n      { { 0 }, { ONE_MONT_P } }\n    };\n    static const vec384 coeffs2[] = {  /* (u + 1)^((2P^n - 2) / 3) */\n      {   TO_LIMB_T(0x890dc9e4867545c3), TO_LIMB_T(0x2af322533285a5d5),\n          TO_LIMB_T(0x50880866309b7e2c), TO_LIMB_T(0xa20d1b8c7e881024),\n          TO_LIMB_T(0x14e4f04fe2db9068), TO_LIMB_T(0x14e56d3f1564853a)   },\n      {   TO_LIMB_T(0xcd03c9e48671f071), TO_LIMB_T(0x5dab22461fcda5d2),\n          TO_LIMB_T(0x587042afd3851b95), TO_LIMB_T(0x8eb60ebe01bacb9e),\n          TO_LIMB_T(0x03f97d6e83d050d2), TO_LIMB_T(0x18f0206554638741)   },\n      {   TO_LIMB_T(0x43f5fffffffcaaae), TO_LIMB_T(0x32b7fff2ed47fffd),\n          TO_LIMB_T(0x07e83a49a2e99d69), TO_LIMB_T(0xeca8f3318332bb7a),\n          TO_LIMB_T(0xef148d1ea0f4c069), TO_LIMB_T(0x040ab3263eff0206)   }\n    };\n\n    frobenius_map_fp2(ret[0], a[0], n);\n    frobenius_map_fp2(ret[1], a[1], n);\n    frobenius_map_fp2(ret[2], a[2], n);\n    --n;    /* implied ONE_MONT_P at index 0 */\n    mul_fp2(ret[1], ret[1], coeffs1[n]);\n    mul_fp(ret[2][0], ret[2][0], coeffs2[n]);\n    mul_fp(ret[2][1], ret[2][1], coeffs2[n]);\n}\n\nstatic void frobenius_map_fp12(vec384fp12 ret, const vec384fp12 a, size_t n)\n{\n    static const vec384x coeffs[] = {  /* (u + 1)^((P^n - 1) / 6) */\n      { { TO_LIMB_T(0x07089552b319d465), TO_LIMB_T(0xc6695f92b50a8313),\n          TO_LIMB_T(0x97e83cccd117228f), TO_LIMB_T(0xa35baecab2dc29ee),\n          TO_LIMB_T(0x1ce393ea5daace4d), TO_LIMB_T(0x08f2220fb0fb66eb) },\n\t{ TO_LIMB_T(0xb2f66aad4ce5d646), TO_LIMB_T(0x5842a06bfc497cec),\n          TO_LIMB_T(0xcf4895d42599d394), TO_LIMB_T(0xc11b9cba40a8e8d0),\n          TO_LIMB_T(0x2e3813cbe5a0de89), TO_LIMB_T(0x110eefda88847faf) } },\n      { { TO_LIMB_T(0xecfb361b798dba3a), TO_LIMB_T(0xc100ddb891865a2c),\n          TO_LIMB_T(0x0ec08ff1232bda8e), TO_LIMB_T(0xd5c13cc6f1ca4721),\n          TO_LIMB_T(0x47222a47bf7b5c04), TO_LIMB_T(0x0110f184e51c5f59) } },\n      { { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183),\n          TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18),\n          TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) },\n\t{ TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c),\n          TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7),\n          TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) } },\n    };\n\n    frobenius_map_fp6(ret[0], a[0], n);\n    frobenius_map_fp6(ret[1], a[1], n);\n    --n;    /* implied ONE_MONT_P at index 0 */\n    mul_fp2(ret[1][0], ret[1][0], coeffs[n]);\n    mul_fp2(ret[1][1], ret[1][1], coeffs[n]);\n    mul_fp2(ret[1][2], ret[1][2], coeffs[n]);\n}\n\n\n/*\n * BLS12-381-specific Fp12 shortcuts.\n */\nvoid blst_fp12_sqr(vec384fp12 ret, const vec384fp12 a)\n{   sqr_fp12(ret, a);   }\n\nvoid blst_fp12_cyclotomic_sqr(vec384fp12 ret, const vec384fp12 a)\n{   cyclotomic_sqr_fp12(ret, a);   }\n\nvoid blst_fp12_mul(vec384fp12 ret, const vec384fp12 a, const vec384fp12 b)\n{   mul_fp12(ret, a, b);   }\n\nvoid blst_fp12_mul_by_xy00z0(vec384fp12 ret, const vec384fp12 a,\n                                             const vec384fp6 xy00z0)\n{   mul_by_xy00z0_fp12(ret, a, xy00z0);   }\n\nvoid blst_fp12_conjugate(vec384fp12 a)\n{   conjugate_fp12(a);   }\n\nvoid blst_fp12_inverse(vec384fp12 ret, const vec384fp12 a)\n{   inverse_fp12(ret, a);   }\n\n/* caveat lector! |n| has to be non-zero and not more than 3! */\nvoid blst_fp12_frobenius_map(vec384fp12 ret, const vec384fp12 a, size_t n)\n{   frobenius_map_fp12(ret, a, n);   }\n\nint blst_fp12_is_equal(const vec384fp12 a, const vec384fp12 b)\n{   return (int)vec_is_equal(a, b, sizeof(vec384fp12));   }\n\nint blst_fp12_is_one(const vec384fp12 a)\n{\n    return (int)(vec_is_equal(a[0][0], BLS12_381_Rx.p2, sizeof(a[0][0])) &\n                 vec_is_zero(a[0][1], sizeof(vec384fp12) - sizeof(a[0][0])));\n}\n\nconst vec384fp12 *blst_fp12_one(void)\n{   return (const vec384fp12 *)BLS12_381_Rx.p12;   }\n\nvoid blst_bendian_from_fp12(unsigned char ret[48*12], const vec384fp12 a)\n{\n    size_t i, j;\n    vec384 out;\n\n    for (i = 0; i < 3; i++) {\n        for (j = 0; j < 2; j++) {\n            from_fp(out, a[j][i][0]);\n            be_bytes_from_limbs(ret, out, sizeof(vec384));  ret += 48;\n            from_fp(out, a[j][i][1]);\n            be_bytes_from_limbs(ret, out, sizeof(vec384));  ret += 48;\n        }\n    }\n}\n\nsize_t blst_fp12_sizeof(void)\n{   return sizeof(vec384fp12);   }\n"
  },
  {
    "path": "src/hash_to_field.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"consts.h\"\n#include \"sha256.h\"\n\nstatic const vec384 BLS12_381_RRRR = {  /* RR^2 */\n    TO_LIMB_T(0xed48ac6bd94ca1e0), TO_LIMB_T(0x315f831e03a7adf8),\n    TO_LIMB_T(0x9a53352a615e29dd), TO_LIMB_T(0x34c04e5e921e1761),\n    TO_LIMB_T(0x2512d43565724728), TO_LIMB_T(0x0aa6346091755d4d)\n};\n\n#ifdef expand_message_xmd\nvoid expand_message_xmd(unsigned char *bytes, size_t len_in_bytes,\n                        const unsigned char *aug, size_t aug_len,\n                        const unsigned char *msg, size_t msg_len,\n                        const unsigned char *DST, size_t DST_len);\n#else\nstatic void sha256_init_Zpad(SHA256_CTX *ctx)\n{\n    ctx->h[0] = 0xda5698beU;\n    ctx->h[1] = 0x17b9b469U;\n    ctx->h[2] = 0x62335799U;\n    ctx->h[3] = 0x779fbecaU;\n    ctx->h[4] = 0x8ce5d491U;\n    ctx->h[5] = 0xc0d26243U;\n    ctx->h[6] = 0xbafef9eaU;\n    ctx->h[7] = 0x1837a9d8U;\n    ctx->N = 64;\n    vec_zero(ctx->buf, sizeof(ctx->buf));\n    ctx->off = 0;\n}\n\nstatic void vec_xor(void *restrict ret, const void *restrict a,\n                                        const void *restrict b, size_t num)\n{\n    limb_t *rp = (limb_t *)ret;\n    const limb_t *ap = (const limb_t *)a;\n    const limb_t *bp = (const limb_t *)b;\n    size_t i;\n\n    num /= sizeof(limb_t);\n\n    for (i = 0; i < num; i++)\n        rp[i] = ap[i] ^ bp[i];\n}\n\nstatic void expand_message_xmd(unsigned char *bytes, size_t len_in_bytes,\n                               const unsigned char *aug, size_t aug_len,\n                               const unsigned char *msg, size_t msg_len,\n                               const unsigned char *DST, size_t DST_len)\n{\n    union { limb_t align; unsigned char c[32]; } b_0;\n    union { limb_t align; unsigned char c[33+256+31]; } b_i;\n    unsigned char *p;\n    size_t i, b_i_bits, b_i_blocks;\n    SHA256_CTX ctx;\n\n    /*\n     * compose template for 'strxor(b_0, b_(i-1)) || I2OSP(i, 1) || DST_prime'\n     */\n    if (DST_len > 255) {\n        sha256_init(&ctx);\n        sha256_update(&ctx, \"H2C-OVERSIZE-DST-\", 17);\n        sha256_update(&ctx, DST, DST_len);\n        sha256_final(b_0.c, &ctx);\n        DST = b_0.c, DST_len = 32;\n    }\n    b_i_blocks = ((33 + DST_len + 1 + 9) + 63) & -64;\n    vec_zero(b_i.c + b_i_blocks - 64, 64);\n\n    p = b_i.c + 33;\n    for (i = 0; i < DST_len; i++)\n        p[i] = DST[i];\n    p[i++] = (unsigned char)DST_len;\n    p[i++] = 0x80;\n    p[i+6] = p[i+5] = p[i+4] = p[i+3] = p[i+2] = p[i+1] = p[i+0] = 0;\n    b_i_bits = (33 + DST_len + 1) * 8;\n    p = b_i.c + b_i_blocks;\n    p[-2] = (unsigned char)(b_i_bits >> 8);\n    p[-1] = (unsigned char)(b_i_bits);\n\n    sha256_init_Zpad(&ctx);                         /* Z_pad | */\n    sha256_update(&ctx, aug, aug_len);              /* | aug | */\n    sha256_update(&ctx, msg, msg_len);              /* | msg | */\n    /* | I2OSP(len_in_bytes, 2) || I2OSP(0, 1) || DST_prime    */\n    b_i.c[30] = (unsigned char)(len_in_bytes >> 8);\n    b_i.c[31] = (unsigned char)(len_in_bytes);\n    b_i.c[32] = 0;\n    sha256_update(&ctx, b_i.c + 30, 3 + DST_len + 1);\n    sha256_final(b_0.c, &ctx);\n\n    sha256_init_h(ctx.h);\n    vec_copy(b_i.c, b_0.c, 32);\n    ++b_i.c[32];\n    sha256_block_data_order(ctx.h, b_i.c, b_i_blocks / 64);\n    sha256_emit(bytes, ctx.h);\n\n    len_in_bytes += 31; /* ell = ceil(len_in_bytes / b_in_bytes), with */\n    len_in_bytes /= 32; /* caller being responsible for accordingly large\n                         * buffer. hash_to_field passes one with length\n                         * divisible by 64, remember? which works... */\n    while (--len_in_bytes) {\n        sha256_init_h(ctx.h);\n        vec_xor(b_i.c, b_0.c, bytes, 32);\n        bytes += 32;\n        ++b_i.c[32];\n        sha256_block_data_order(ctx.h, b_i.c, b_i_blocks / 64);\n        sha256_emit(bytes, ctx.h);\n    }\n}\n#endif\n\n/*\n * |nelems| is 'count * m' from spec\n */\nstatic void hash_to_field(vec384 elems[], size_t nelems,\n                          const unsigned char *aug, size_t aug_len,\n                          const unsigned char *msg, size_t msg_len,\n                          const unsigned char *DST, size_t DST_len)\n{\n    size_t L = sizeof(vec384) + 128/8;  /* ceil((ceil(log2(p)) + k) / 8) */\n    size_t len_in_bytes = L * nelems;   /* divisible by 64, hurray!      */\n#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 \\\n                               || defined(__STDC_NO_VLA__)\n    limb_t *pseudo_random = alloca(len_in_bytes);\n#else\n    limb_t pseudo_random[len_in_bytes/sizeof(limb_t)];\n#endif\n    unsigned char *bytes;\n    vec768 elem;\n\n    aug_len = aug!=NULL ? aug_len : 0;\n    DST_len = DST!=NULL ? DST_len : 0;\n\n    expand_message_xmd((unsigned char *)pseudo_random, len_in_bytes,\n                       aug, aug_len, msg, msg_len, DST, DST_len);\n\n    vec_zero(elem, sizeof(elem));\n    bytes = (unsigned char *)pseudo_random;\n    while (nelems--) {\n        limbs_from_be_bytes(elem, bytes, L);\n        bytes += L;\n        /*\n         * L-bytes block % P, output is in Montgomery domain...\n         */\n        redc_mont_384(elems[0], elem, BLS12_381_P, p0);\n        mul_mont_384(elems[0], elems[0], BLS12_381_RRRR, BLS12_381_P, p0);\n        elems++;\n    }\n}\n\nvoid blst_expand_message_xmd(unsigned char *bytes, size_t len_in_bytes,\n                             const unsigned char *msg, size_t msg_len,\n                             const unsigned char *DST, size_t DST_len)\n{\n    size_t buf_len = (len_in_bytes+31) & ((size_t)0-32);\n    unsigned char *buf_ptr = bytes;\n\n    if (buf_len > 255*32)\n        return;\n\n    if (buf_len != len_in_bytes)\n        buf_ptr = alloca(buf_len);\n\n    expand_message_xmd(buf_ptr, len_in_bytes, NULL, 0, msg, msg_len,\n                                              DST, DST_len);\n    if (buf_ptr != bytes) {\n        unsigned char *ptr = buf_ptr;\n        while (len_in_bytes--)\n            *bytes++ = *ptr++;\n        vec_zero(buf_ptr, buf_len);\n    }\n}\n"
  },
  {
    "path": "src/keygen.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"consts.h\"\n#include \"bytes.h\"\n#include \"sha256.h\"\n\ntypedef struct {\n    SHA256_CTX ctx;\n    unsigned int h_ipad[8];\n    unsigned int h_opad[8];\n    union { limb_t l[64/sizeof(limb_t)]; unsigned char c[64]; } tail;\n} HMAC_SHA256_CTX;\n\nstatic void HMAC_init(HMAC_SHA256_CTX *ctx, const void *K, size_t K_len)\n{\n    size_t i;\n\n    if (K == NULL) {            /* reuse h_ipad and h_opad */\n        sha256_hcopy(ctx->ctx.h, ctx->h_ipad);\n        ctx->ctx.N = 64;\n        vec_zero(ctx->ctx.buf, sizeof(ctx->ctx.buf));\n        ctx->ctx.off = 0;\n\n        return;\n    }\n\n    vec_zero(ctx->tail.c, sizeof(ctx->tail));\n    if (K_len > 64) {\n        sha256_init(&ctx->ctx);\n        sha256_update(&ctx->ctx, K, K_len);\n        sha256_final(ctx->tail.c, &ctx->ctx);\n    } else if (K_len != 0) {\n        sha256_bcopy(ctx->tail.c, K, K_len);\n    }\n\n    for (i = 0; i < 64/sizeof(limb_t); i++)\n        ctx->tail.l[i] ^= (limb_t)0x3636363636363636;\n\n    sha256_init(&ctx->ctx);\n    sha256_update(&ctx->ctx, ctx->tail.c, 64);\n    sha256_hcopy(ctx->h_ipad, ctx->ctx.h);\n\n    for (i = 0; i < 64/sizeof(limb_t); i++)\n        ctx->tail.l[i] ^= (limb_t)(0x3636363636363636 ^ 0x5c5c5c5c5c5c5c5c);\n\n    sha256_init_h(ctx->h_opad);\n    sha256_block_data_order(ctx->h_opad, ctx->tail.c, 1);\n\n    vec_zero(ctx->tail.c, sizeof(ctx->tail));\n    ctx->tail.c[32] = 0x80;\n    ctx->tail.c[62] = 3;        /* (64+32)*8 in big endian */\n    ctx->tail.c[63] = 0;\n}\n\nstatic void HMAC_update(HMAC_SHA256_CTX *ctx, const unsigned char *inp,\n                                              size_t len)\n{   sha256_update(&ctx->ctx, inp, len);   }\n\nstatic void HMAC_final(unsigned char md[32], HMAC_SHA256_CTX *ctx)\n{\n    sha256_final(ctx->tail.c, &ctx->ctx);\n    sha256_hcopy(ctx->ctx.h, ctx->h_opad);\n    sha256_block_data_order(ctx->ctx.h, ctx->tail.c, 1);\n    sha256_emit(md, ctx->ctx.h);\n}\n\nstatic void HKDF_Extract(unsigned char PRK[32],\n                         const void *salt, size_t salt_len,\n                         const void *IKM,  size_t IKM_len,\n#ifndef __BLST_HKDF_TESTMODE__\n                         int IKM_fixup,\n#endif\n                         HMAC_SHA256_CTX *ctx)\n{\n    unsigned char zero[1] = { 0 };\n\n    HMAC_init(ctx, salt != NULL ? salt : zero, salt_len);\n    HMAC_update(ctx, IKM, IKM_len);\n#ifndef __BLST_HKDF_TESTMODE__\n    if (IKM_fixup) {\n        /* Section 2.3 KeyGen in BLS-signature draft */\n        HMAC_update(ctx, zero, 1);\n    }\n#endif\n    HMAC_final(PRK, ctx);\n}\n\nstatic void HKDF_Expand(unsigned char *OKM, size_t L,\n                        const unsigned char PRK[32],\n                        const void *info, size_t info_len,\n#ifndef __BLST_HKDF_TESTMODE__\n                        int info_fixup,\n#endif\n                        HMAC_SHA256_CTX *ctx)\n{\n#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 \\\n                               || defined(__STDC_NO_VLA__)\n    unsigned char *info_prime = alloca(info_len + 2 + 1);\n#else\n    unsigned char info_prime[info_len + 2 + 1];\n#endif\n\n    HMAC_init(ctx, PRK, 32);\n\n    if (info_len != 0)\n        sha256_bcopy(info_prime, info, info_len);\n#ifndef __BLST_HKDF_TESTMODE__\n    if (info_fixup) {\n        /* Section 2.3 KeyGen in BLS-signature draft */\n        info_prime[info_len + 0] = (unsigned char)(L >> 8);\n        info_prime[info_len + 1] = (unsigned char)(L);\n        info_len += 2;\n    }\n#endif\n    info_prime[info_len] = 1;   /* counter */\n    HMAC_update(ctx, info_prime, info_len + 1);\n    HMAC_final(ctx->tail.c, ctx);\n    while (L > 32) {\n        sha256_hcopy((unsigned int *)OKM, (const unsigned int *)ctx->tail.c);\n        OKM += 32; L -= 32;\n        ++info_prime[info_len]; /* counter */\n        HMAC_init(ctx, NULL, 0);\n        HMAC_update(ctx, ctx->tail.c, 32);\n        HMAC_update(ctx, info_prime, info_len + 1);\n        HMAC_final(ctx->tail.c, ctx);\n    }\n    sha256_bcopy(OKM, ctx->tail.c, L);\n}\n\n#ifndef __BLST_HKDF_TESTMODE__\nstatic void keygen(pow256 SK, const void *IKM, size_t IKM_len,\n                              const void *salt, size_t salt_len,\n                              const void *info, size_t info_len,\n                              int version)\n{\n    struct {\n        HMAC_SHA256_CTX ctx;\n        unsigned char PRK[32], OKM[48];\n        vec512 key;\n    } scratch;\n    unsigned char salt_prime[32] = \"BLS-SIG-KEYGEN-SALT-\";\n\n    if (IKM_len < 32 || (version > 4 && salt == NULL)) {\n        vec_zero(SK, sizeof(pow256));\n        return;\n    }\n\n    /*\n     * Vet |info| since some callers were caught to be sloppy, e.g.\n     * SWIG-4.0-generated Python wrapper...\n     */\n    info_len = info==NULL ? 0 : info_len;\n\n    if (salt == NULL) {\n        salt = salt_prime;\n        salt_len = 20;\n    }\n\n    if (version == 4) {\n        /* salt = H(salt) */\n        sha256_init(&scratch.ctx.ctx);\n        sha256_update(&scratch.ctx.ctx, salt, salt_len);\n        sha256_final(salt_prime, &scratch.ctx.ctx);\n        salt = salt_prime;\n        salt_len = sizeof(salt_prime);\n    }\n\n    while (1) {\n        /* PRK = HKDF-Extract(salt, IKM || I2OSP(0, 1)) */\n        HKDF_Extract(scratch.PRK, salt, salt_len,\n                                  IKM, IKM_len, 1, &scratch.ctx);\n\n        /* OKM = HKDF-Expand(PRK, key_info || I2OSP(L, 2), L) */\n        HKDF_Expand(scratch.OKM, sizeof(scratch.OKM), scratch.PRK,\n                    info, info_len, 1, &scratch.ctx);\n\n        /* SK = OS2IP(OKM) mod r */\n        vec_zero(scratch.key, sizeof(scratch.key));\n        limbs_from_be_bytes(scratch.key, scratch.OKM, sizeof(scratch.OKM));\n        redc_mont_256(scratch.key, scratch.key, BLS12_381_r, r0);\n        /*\n         * Given that mul_mont_sparse_256 has special boundary conditions\n         * it's appropriate to mention that redc_mont_256 output is fully\n         * reduced at this point. Because we started with 384-bit input,\n         * one with most significant half smaller than the modulus.\n         */\n        mul_mont_sparse_256(scratch.key, scratch.key, BLS12_381_rRR,\n                            BLS12_381_r, r0);\n\n        if (version < 4 || !vec_is_zero(scratch.key, sizeof(vec256)))\n            break;\n\n        /* salt = H(salt) */\n        sha256_init(&scratch.ctx.ctx);\n        sha256_update(&scratch.ctx.ctx, salt, salt_len);\n        sha256_final(salt_prime, &scratch.ctx.ctx);\n        salt = salt_prime;\n        salt_len = sizeof(salt_prime);\n    }\n\n    le_bytes_from_limbs(SK, scratch.key, sizeof(pow256));\n\n    /*\n     * scrub the stack just in case next callee inadvertently flashes\n     * a fragment across application boundary...\n     */\n    vec_zero(&scratch, sizeof(scratch));\n}\n\nvoid blst_keygen(pow256 SK, const void *IKM, size_t IKM_len,\n                            const void *info, size_t info_len)\n{   keygen(SK, IKM, IKM_len, NULL, 0, info, info_len, 4);   }\n\nvoid blst_keygen_v3(pow256 SK, const void *IKM, size_t IKM_len,\n                               const void *info, size_t info_len)\n{   keygen(SK, IKM, IKM_len, NULL, 0, info, info_len, 3);   }\n\nvoid blst_keygen_v4_5(pow256 SK, const void *IKM, size_t IKM_len,\n                                 const void *salt, size_t salt_len,\n                                 const void *info, size_t info_len)\n{   keygen(SK, IKM, IKM_len, salt, salt_len, info, info_len, 4);   }\n\nvoid blst_keygen_v5(pow256 SK, const void *IKM, size_t IKM_len,\n                               const void *salt, size_t salt_len,\n                               const void *info, size_t info_len)\n{   keygen(SK, IKM, IKM_len, salt, salt_len, info, info_len, 5);   }\n\n/*\n * https://eips.ethereum.org/EIPS/eip-2333\n */\nvoid blst_derive_master_eip2333(pow256 SK, const void *seed, size_t seed_len)\n{   keygen(SK, seed, seed_len, NULL, 0, NULL, 0, 4);   }\n\nstatic void parent_SK_to_lamport_PK(pow256 PK, const pow256 parent_SK,\n                                    unsigned int index)\n{\n    size_t i;\n    struct {\n        HMAC_SHA256_CTX ctx;\n        SHA256_CTX ret;\n        unsigned char PRK[32], IKM[32];\n        unsigned char lamport[255][32];\n    } scratch;\n\n    /* salt = I2OSP(index, 4) */\n    unsigned char salt[4] = { (unsigned char)(index>>24),\n                              (unsigned char)(index>>16),\n                              (unsigned char)(index>>8),\n                              (unsigned char)(index) };\n\n    /* IKM = I2OSP(parent_SK, 32) */\n    for (i = 0; i < 32; i++)\n        scratch.IKM[i] = parent_SK[31-i];\n\n    /* lamport_0 = IKM_to_lamport_SK(IKM, salt) */\n    HKDF_Extract(scratch.PRK, salt, sizeof(salt), scratch.IKM, 32, 0,\n                 &scratch.ctx);\n    HKDF_Expand(scratch.lamport[0], sizeof(scratch.lamport),\n                scratch.PRK, NULL, 0, 0, &scratch.ctx);\n\n    vec_zero(scratch.ctx.ctx.buf, sizeof(scratch.ctx.ctx.buf));\n    scratch.ctx.ctx.buf[32] = 0x80;\n    scratch.ctx.ctx.buf[62] = 1;    /* 32*8 in big endian */\n    scratch.ctx.ctx.buf[63] = 0;\n    for (i = 0; i < 255; i++) {\n        /* lamport_PK = lamport_PK | SHA256(lamport_0[i]) */\n        sha256_init_h(scratch.ctx.ctx.h);\n        sha256_bcopy(scratch.ctx.ctx.buf, scratch.lamport[i], 32);\n        sha256_block_data_order(scratch.ctx.ctx.h, scratch.ctx.ctx.buf, 1);\n        sha256_emit(scratch.lamport[i], scratch.ctx.ctx.h);\n    }\n\n    /* compressed_lamport_PK = SHA256(lamport_PK) */\n    sha256_init(&scratch.ret);\n    sha256_update(&scratch.ret, scratch.lamport, sizeof(scratch.lamport));\n\n    /* not_IKM = flip_bits(IKM) */\n    for (i = 0; i< 32; i++)\n        scratch.IKM[i] = ~scratch.IKM[i];\n\n    /* lamport_1 = IKM_to_lamport_SK(not_IKM, salt) */\n    HKDF_Extract(scratch.PRK, salt, sizeof(salt), scratch.IKM, 32, 0,\n                 &scratch.ctx);\n    HKDF_Expand(scratch.lamport[0], sizeof(scratch.lamport),\n                scratch.PRK, NULL, 0, 0, &scratch.ctx);\n\n    vec_zero(scratch.ctx.ctx.buf, sizeof(scratch.ctx.ctx.buf));\n    scratch.ctx.ctx.buf[32] = 0x80;\n    scratch.ctx.ctx.buf[62] = 1;\n    for (i = 0; i < 255; i++) {\n        /* lamport_PK = lamport_PK | SHA256(lamport_1[i]) */\n        sha256_init_h(scratch.ctx.ctx.h);\n        sha256_bcopy(scratch.ctx.ctx.buf, scratch.lamport[i], 32);\n        sha256_block_data_order(scratch.ctx.ctx.h, scratch.ctx.ctx.buf, 1);\n        sha256_emit(scratch.lamport[i], scratch.ctx.ctx.h);\n    }\n\n    /* compressed_lamport_PK = SHA256(lamport_PK) */\n    sha256_update(&scratch.ret, scratch.lamport, sizeof(scratch.lamport));\n    sha256_final(PK, &scratch.ret);\n\n    /*\n     * scrub the stack just in case next callee inadvertently flashes\n     * a fragment across application boundary...\n     */\n    vec_zero(&scratch, sizeof(scratch));\n}\n\nvoid blst_derive_child_eip2333(pow256 SK, const pow256 parent_SK,\n                               unsigned int child_index)\n{\n    parent_SK_to_lamport_PK(SK, parent_SK, child_index);\n    keygen(SK, SK, sizeof(pow256), NULL, 0, NULL, 0, 4);\n}\n#endif\n"
  },
  {
    "path": "src/map_to_g1.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"point.h\"\n#include \"fields.h\"\n\n/*\n * y^2 = x^3 + A'*x + B', isogenous one\n */\nstatic const vec384 Aprime_E1 = {\n    /* (0x00144698a3b8e9433d693a02c96d4982b0ea985383ee66a8\n          d8e8981aefd881ac98936f8da0e0f97f5cf428082d584c1d << 384) % P */\n    TO_LIMB_T(0x2f65aa0e9af5aa51), TO_LIMB_T(0x86464c2d1e8416c3),\n    TO_LIMB_T(0xb85ce591b7bd31e2), TO_LIMB_T(0x27e11c91b5f24e7c),\n    TO_LIMB_T(0x28376eda6bfc1835), TO_LIMB_T(0x155455c3e5071d85)\n};\nstatic const vec384 Bprime_E1 = {\n    /* (0x12e2908d11688030018b12e8753eee3b2016c1f0f24f4070\n          a0b9c14fcef35ef55a23215a316ceaa5d1cc48e98e172be0 << 384) % P */\n    TO_LIMB_T(0xfb996971fe22a1e0), TO_LIMB_T(0x9aa93eb35b742d6f),\n    TO_LIMB_T(0x8c476013de99c5c4), TO_LIMB_T(0x873e27c3a221e571),\n    TO_LIMB_T(0xca72b5e45a52d888), TO_LIMB_T(0x06824061418a386b)\n};\n\nstatic void map_fp_times_Zz(vec384 map[], const vec384 isogeny_map[],\n                            const vec384 Zz_powers[], size_t n)\n{\n    while (n--)\n        mul_fp(map[n], isogeny_map[n], Zz_powers[n]);\n}\n\nstatic void map_fp(vec384 acc, const vec384 x, const vec384 map[], size_t n)\n{\n    while (n--) {\n        mul_fp(acc, acc, x);\n        add_fp(acc, acc, map[n]);\n    }\n}\n\nstatic void isogeny_map_to_E1(POINTonE1 *out, const POINTonE1 *p)\n{\n    /*\n     * x = x_num / x_den, where\n     * x_num = k_(1,11) * x'^11 + k_(1,10) * x'^10 + k_(1,9) * x'^9 +\n     *         ... + k_(1,0)\n     * ...\n     */\n    static const vec384 isogeny_map_x_num[] = { /*  (k_(1,*)<<384) % P  */\n      { TO_LIMB_T(0x4d18b6f3af00131c), TO_LIMB_T(0x19fa219793fee28c),\n        TO_LIMB_T(0x3f2885f1467f19ae), TO_LIMB_T(0x23dcea34f2ffb304),\n        TO_LIMB_T(0xd15b58d2ffc00054), TO_LIMB_T(0x0913be200a20bef4)  },\n      { TO_LIMB_T(0x898985385cdbbd8b), TO_LIMB_T(0x3c79e43cc7d966aa),\n        TO_LIMB_T(0x1597e193f4cd233a), TO_LIMB_T(0x8637ef1e4d6623ad),\n        TO_LIMB_T(0x11b22deed20d827b), TO_LIMB_T(0x07097bc5998784ad)  },\n      { TO_LIMB_T(0xa542583a480b664b), TO_LIMB_T(0xfc7169c026e568c6),\n        TO_LIMB_T(0x5ba2ef314ed8b5a6), TO_LIMB_T(0x5b5491c05102f0e7),\n        TO_LIMB_T(0xdf6e99707d2a0079), TO_LIMB_T(0x0784151ed7605524)  },\n      { TO_LIMB_T(0x494e212870f72741), TO_LIMB_T(0xab9be52fbda43021),\n        TO_LIMB_T(0x26f5577994e34c3d), TO_LIMB_T(0x049dfee82aefbd60),\n        TO_LIMB_T(0x65dadd7828505289), TO_LIMB_T(0x0e93d431ea011aeb)  },\n      { TO_LIMB_T(0x90ee774bd6a74d45), TO_LIMB_T(0x7ada1c8a41bfb185),\n        TO_LIMB_T(0x0f1a8953b325f464), TO_LIMB_T(0x104c24211be4805c),\n        TO_LIMB_T(0x169139d319ea7a8f), TO_LIMB_T(0x09f20ead8e532bf6)  },\n      { TO_LIMB_T(0x6ddd93e2f43626b7), TO_LIMB_T(0xa5482c9aa1ccd7bd),\n        TO_LIMB_T(0x143245631883f4bd), TO_LIMB_T(0x2e0a94ccf77ec0db),\n        TO_LIMB_T(0xb0282d480e56489f), TO_LIMB_T(0x18f4bfcbb4368929)  },\n      { TO_LIMB_T(0x23c5f0c953402dfd), TO_LIMB_T(0x7a43ff6958ce4fe9),\n        TO_LIMB_T(0x2c390d3d2da5df63), TO_LIMB_T(0xd0df5c98e1f9d70f),\n        TO_LIMB_T(0xffd89869a572b297), TO_LIMB_T(0x1277ffc72f25e8fe)  },\n      { TO_LIMB_T(0x79f4f0490f06a8a6), TO_LIMB_T(0x85f894a88030fd81),\n        TO_LIMB_T(0x12da3054b18b6410), TO_LIMB_T(0xe2a57f6505880d65),\n        TO_LIMB_T(0xbba074f260e400f1), TO_LIMB_T(0x08b76279f621d028)  },\n      { TO_LIMB_T(0xe67245ba78d5b00b), TO_LIMB_T(0x8456ba9a1f186475),\n        TO_LIMB_T(0x7888bff6e6b33bb4), TO_LIMB_T(0xe21585b9a30f86cb),\n        TO_LIMB_T(0x05a69cdcef55feee), TO_LIMB_T(0x09e699dd9adfa5ac)  },\n      { TO_LIMB_T(0x0de5c357bff57107), TO_LIMB_T(0x0a0db4ae6b1a10b2),\n        TO_LIMB_T(0xe256bb67b3b3cd8d), TO_LIMB_T(0x8ad456574e9db24f),\n        TO_LIMB_T(0x0443915f50fd4179), TO_LIMB_T(0x098c4bf7de8b6375)  },\n      { TO_LIMB_T(0xe6b0617e7dd929c7), TO_LIMB_T(0xfe6e37d442537375),\n        TO_LIMB_T(0x1dafdeda137a489e), TO_LIMB_T(0xe4efd1ad3f767ceb),\n        TO_LIMB_T(0x4a51d8667f0fe1cf), TO_LIMB_T(0x054fdf4bbf1d821c)  },\n      { TO_LIMB_T(0x72db2a50658d767b), TO_LIMB_T(0x8abf91faa257b3d5),\n        TO_LIMB_T(0xe969d6833764ab47), TO_LIMB_T(0x464170142a1009eb),\n        TO_LIMB_T(0xb14f01aadb30be2f), TO_LIMB_T(0x18ae6a856f40715d)  }\n    };\n    /* ...\n     * x_den = x'^10 + k_(2,9) * x'^9 + k_(2,8) * x'^8 + ... + k_(2,0)\n     */\n    static const vec384 isogeny_map_x_den[] = { /*  (k_(2,*)<<384) % P  */\n      { TO_LIMB_T(0xb962a077fdb0f945), TO_LIMB_T(0xa6a9740fefda13a0),\n        TO_LIMB_T(0xc14d568c3ed6c544), TO_LIMB_T(0xb43fc37b908b133e),\n        TO_LIMB_T(0x9c0b3ac929599016), TO_LIMB_T(0x0165aa6c93ad115f)  },\n      { TO_LIMB_T(0x23279a3ba506c1d9), TO_LIMB_T(0x92cfca0a9465176a),\n        TO_LIMB_T(0x3b294ab13755f0ff), TO_LIMB_T(0x116dda1c5070ae93),\n        TO_LIMB_T(0xed4530924cec2045), TO_LIMB_T(0x083383d6ed81f1ce)  },\n      { TO_LIMB_T(0x9885c2a6449fecfc), TO_LIMB_T(0x4a2b54ccd37733f0),\n        TO_LIMB_T(0x17da9ffd8738c142), TO_LIMB_T(0xa0fba72732b3fafd),\n        TO_LIMB_T(0xff364f36e54b6812), TO_LIMB_T(0x0f29c13c660523e2)  },\n      { TO_LIMB_T(0xe349cc118278f041), TO_LIMB_T(0xd487228f2f3204fb),\n        TO_LIMB_T(0xc9d325849ade5150), TO_LIMB_T(0x43a92bd69c15c2df),\n        TO_LIMB_T(0x1c2c7844bc417be4), TO_LIMB_T(0x12025184f407440c)  },\n      { TO_LIMB_T(0x587f65ae6acb057b), TO_LIMB_T(0x1444ef325140201f),\n        TO_LIMB_T(0xfbf995e71270da49), TO_LIMB_T(0xccda066072436a42),\n        TO_LIMB_T(0x7408904f0f186bb2), TO_LIMB_T(0x13b93c63edf6c015)  },\n      { TO_LIMB_T(0xfb918622cd141920), TO_LIMB_T(0x4a4c64423ecaddb4),\n        TO_LIMB_T(0x0beb232927f7fb26), TO_LIMB_T(0x30f94df6f83a3dc2),\n        TO_LIMB_T(0xaeedd424d780f388), TO_LIMB_T(0x06cc402dd594bbeb)  },\n      { TO_LIMB_T(0xd41f761151b23f8f), TO_LIMB_T(0x32a92465435719b3),\n        TO_LIMB_T(0x64f436e888c62cb9), TO_LIMB_T(0xdf70a9a1f757c6e4),\n        TO_LIMB_T(0x6933a38d5b594c81), TO_LIMB_T(0x0c6f7f7237b46606)  },\n      { TO_LIMB_T(0x693c08747876c8f7), TO_LIMB_T(0x22c9850bf9cf80f0),\n        TO_LIMB_T(0x8e9071dab950c124), TO_LIMB_T(0x89bc62d61c7baf23),\n        TO_LIMB_T(0xbc6be2d8dad57c23), TO_LIMB_T(0x17916987aa14a122)  },\n      { TO_LIMB_T(0x1be3ff439c1316fd), TO_LIMB_T(0x9965243a7571dfa7),\n        TO_LIMB_T(0xc7f7f62962f5cd81), TO_LIMB_T(0x32c6aa9af394361c),\n        TO_LIMB_T(0xbbc2ee18e1c227f4), TO_LIMB_T(0x0c102cbac531bb34)  },\n      { TO_LIMB_T(0x997614c97bacbf07), TO_LIMB_T(0x61f86372b99192c0),\n        TO_LIMB_T(0x5b8c95fc14353fc3), TO_LIMB_T(0xca2b066c2a87492f),\n        TO_LIMB_T(0x16178f5bbf698711), TO_LIMB_T(0x12a6dcd7f0f4e0e8)  }\n    };\n    /*\n     * y = y' * y_num / y_den, where\n     * y_num = k_(3,15) * x'^15 + k_(3,14) * x'^14 + k_(3,13) * x'^13 +\n     *         ... + k_(3,0)\n     * ...\n     */\n    static const vec384 isogeny_map_y_num[] = { /*  (k_(3,*)<<384) % P  */\n      { TO_LIMB_T(0x2b567ff3e2837267), TO_LIMB_T(0x1d4d9e57b958a767),\n        TO_LIMB_T(0xce028fea04bd7373), TO_LIMB_T(0xcc31a30a0b6cd3df),\n        TO_LIMB_T(0x7d7b18a682692693), TO_LIMB_T(0x0d300744d42a0310)  },\n      { TO_LIMB_T(0x99c2555fa542493f), TO_LIMB_T(0xfe7f53cc4874f878),\n        TO_LIMB_T(0x5df0608b8f97608a), TO_LIMB_T(0x14e03832052b49c8),\n        TO_LIMB_T(0x706326a6957dd5a4), TO_LIMB_T(0x0a8dadd9c2414555)  },\n      { TO_LIMB_T(0x13d942922a5cf63a), TO_LIMB_T(0x357e33e36e261e7d),\n        TO_LIMB_T(0xcf05a27c8456088d), TO_LIMB_T(0x0000bd1de7ba50f0),\n        TO_LIMB_T(0x83d0c7532f8c1fde), TO_LIMB_T(0x13f70bf38bbf2905)  },\n      { TO_LIMB_T(0x5c57fd95bfafbdbb), TO_LIMB_T(0x28a359a65e541707),\n        TO_LIMB_T(0x3983ceb4f6360b6d), TO_LIMB_T(0xafe19ff6f97e6d53),\n        TO_LIMB_T(0xb3468f4550192bf7), TO_LIMB_T(0x0bb6cde49d8ba257)  },\n      { TO_LIMB_T(0x590b62c7ff8a513f), TO_LIMB_T(0x314b4ce372cacefd),\n        TO_LIMB_T(0x6bef32ce94b8a800), TO_LIMB_T(0x6ddf84a095713d5f),\n        TO_LIMB_T(0x64eace4cb0982191), TO_LIMB_T(0x0386213c651b888d)  },\n      { TO_LIMB_T(0xa5310a31111bbcdd), TO_LIMB_T(0xa14ac0f5da148982),\n        TO_LIMB_T(0xf9ad9cc95423d2e9), TO_LIMB_T(0xaa6ec095283ee4a7),\n        TO_LIMB_T(0xcf5b1f022e1c9107), TO_LIMB_T(0x01fddf5aed881793)  },\n      { TO_LIMB_T(0x65a572b0d7a7d950), TO_LIMB_T(0xe25c2d8183473a19),\n        TO_LIMB_T(0xc2fcebe7cb877dbd), TO_LIMB_T(0x05b2d36c769a89b0),\n        TO_LIMB_T(0xba12961be86e9efb), TO_LIMB_T(0x07eb1b29c1dfde1f)  },\n      { TO_LIMB_T(0x93e09572f7c4cd24), TO_LIMB_T(0x364e929076795091),\n        TO_LIMB_T(0x8569467e68af51b5), TO_LIMB_T(0xa47da89439f5340f),\n        TO_LIMB_T(0xf4fa918082e44d64), TO_LIMB_T(0x0ad52ba3e6695a79)  },\n      { TO_LIMB_T(0x911429844e0d5f54), TO_LIMB_T(0xd03f51a3516bb233),\n        TO_LIMB_T(0x3d587e5640536e66), TO_LIMB_T(0xfa86d2a3a9a73482),\n        TO_LIMB_T(0xa90ed5adf1ed5537), TO_LIMB_T(0x149c9c326a5e7393)  },\n      { TO_LIMB_T(0x462bbeb03c12921a), TO_LIMB_T(0xdc9af5fa0a274a17),\n        TO_LIMB_T(0x9a558ebde836ebed), TO_LIMB_T(0x649ef8f11a4fae46),\n        TO_LIMB_T(0x8100e1652b3cdc62), TO_LIMB_T(0x1862bd62c291dacb)  },\n      { TO_LIMB_T(0x05c9b8ca89f12c26), TO_LIMB_T(0x0194160fa9b9ac4f),\n        TO_LIMB_T(0x6a643d5a6879fa2c), TO_LIMB_T(0x14665bdd8846e19d),\n        TO_LIMB_T(0xbb1d0d53af3ff6bf), TO_LIMB_T(0x12c7e1c3b28962e5)  },\n      { TO_LIMB_T(0xb55ebf900b8a3e17), TO_LIMB_T(0xfedc77ec1a9201c4),\n        TO_LIMB_T(0x1f07db10ea1a4df4), TO_LIMB_T(0x0dfbd15dc41a594d),\n        TO_LIMB_T(0x389547f2334a5391), TO_LIMB_T(0x02419f98165871a4)  },\n      { TO_LIMB_T(0xb416af000745fc20), TO_LIMB_T(0x8e563e9d1ea6d0f5),\n        TO_LIMB_T(0x7c763e17763a0652), TO_LIMB_T(0x01458ef0159ebbef),\n        TO_LIMB_T(0x8346fe421f96bb13), TO_LIMB_T(0x0d2d7b829ce324d2)  },\n      { TO_LIMB_T(0x93096bb538d64615), TO_LIMB_T(0x6f2a2619951d823a),\n        TO_LIMB_T(0x8f66b3ea59514fa4), TO_LIMB_T(0xf563e63704f7092f),\n        TO_LIMB_T(0x724b136c4cf2d9fa), TO_LIMB_T(0x046959cfcfd0bf49)  },\n      { TO_LIMB_T(0xea748d4b6e405346), TO_LIMB_T(0x91e9079c2c02d58f),\n        TO_LIMB_T(0x41064965946d9b59), TO_LIMB_T(0xa06731f1d2bbe1ee),\n        TO_LIMB_T(0x07f897e267a33f1b), TO_LIMB_T(0x1017290919210e5f)  },\n      { TO_LIMB_T(0x872aa6c17d985097), TO_LIMB_T(0xeecc53161264562a),\n        TO_LIMB_T(0x07afe37afff55002), TO_LIMB_T(0x54759078e5be6838),\n        TO_LIMB_T(0xc4b92d15db8acca8), TO_LIMB_T(0x106d87d1b51d13b9)  }\n    };\n    /* ...\n     * y_den = x'^15 + k_(4,14) * x'^14 + k_(4,13) * x'^13 + ... + k_(4,0)\n     */\n    static const vec384 isogeny_map_y_den[] = { /*  (k_(4,*)<<384) % P  */\n      { TO_LIMB_T(0xeb6c359d47e52b1c), TO_LIMB_T(0x18ef5f8a10634d60),\n        TO_LIMB_T(0xddfa71a0889d5b7e), TO_LIMB_T(0x723e71dcc5fc1323),\n        TO_LIMB_T(0x52f45700b70d5c69), TO_LIMB_T(0x0a8b981ee47691f1)  },\n      { TO_LIMB_T(0x616a3c4f5535b9fb), TO_LIMB_T(0x6f5f037395dbd911),\n        TO_LIMB_T(0xf25f4cc5e35c65da), TO_LIMB_T(0x3e50dffea3c62658),\n        TO_LIMB_T(0x6a33dca523560776), TO_LIMB_T(0x0fadeff77b6bfe3e)  },\n      { TO_LIMB_T(0x2be9b66df470059c), TO_LIMB_T(0x24a2c159a3d36742),\n        TO_LIMB_T(0x115dbe7ad10c2a37), TO_LIMB_T(0xb6634a652ee5884d),\n        TO_LIMB_T(0x04fe8bb2b8d81af4), TO_LIMB_T(0x01c2a7a256fe9c41)  },\n      { TO_LIMB_T(0xf27bf8ef3b75a386), TO_LIMB_T(0x898b367476c9073f),\n        TO_LIMB_T(0x24482e6b8c2f4e5f), TO_LIMB_T(0xc8e0bbd6fe110806),\n        TO_LIMB_T(0x59b0c17f7631448a), TO_LIMB_T(0x11037cd58b3dbfbd)  },\n      { TO_LIMB_T(0x31c7912ea267eec6), TO_LIMB_T(0x1dbf6f1c5fcdb700),\n        TO_LIMB_T(0xd30d4fe3ba86fdb1), TO_LIMB_T(0x3cae528fbee9a2a4),\n        TO_LIMB_T(0xb1cce69b6aa9ad9a), TO_LIMB_T(0x044393bb632d94fb)  },\n      { TO_LIMB_T(0xc66ef6efeeb5c7e8), TO_LIMB_T(0x9824c289dd72bb55),\n        TO_LIMB_T(0x71b1a4d2f119981d), TO_LIMB_T(0x104fc1aafb0919cc),\n        TO_LIMB_T(0x0e49df01d942a628), TO_LIMB_T(0x096c3a09773272d4)  },\n      { TO_LIMB_T(0x9abc11eb5fadeff4), TO_LIMB_T(0x32dca50a885728f0),\n        TO_LIMB_T(0xfb1fa3721569734c), TO_LIMB_T(0xc4b76271ea6506b3),\n        TO_LIMB_T(0xd466a75599ce728e), TO_LIMB_T(0x0c81d4645f4cb6ed)  },\n      { TO_LIMB_T(0x4199f10e5b8be45b), TO_LIMB_T(0xda64e495b1e87930),\n        TO_LIMB_T(0xcb353efe9b33e4ff), TO_LIMB_T(0x9e9efb24aa6424c6),\n        TO_LIMB_T(0xf08d33680a237465), TO_LIMB_T(0x0d3378023e4c7406)  },\n      { TO_LIMB_T(0x7eb4ae92ec74d3a5), TO_LIMB_T(0xc341b4aa9fac3497),\n        TO_LIMB_T(0x5be603899e907687), TO_LIMB_T(0x03bfd9cca75cbdeb),\n        TO_LIMB_T(0x564c2935a96bfa93), TO_LIMB_T(0x0ef3c33371e2fdb5)  },\n      { TO_LIMB_T(0x7ee91fd449f6ac2e), TO_LIMB_T(0xe5d5bd5cb9357a30),\n        TO_LIMB_T(0x773a8ca5196b1380), TO_LIMB_T(0xd0fda172174ed023),\n        TO_LIMB_T(0x6cb95e0fa776aead), TO_LIMB_T(0x0d22d5a40cec7cff)  },\n      { TO_LIMB_T(0xf727e09285fd8519), TO_LIMB_T(0xdc9d55a83017897b),\n        TO_LIMB_T(0x7549d8bd057894ae), TO_LIMB_T(0x178419613d90d8f8),\n        TO_LIMB_T(0xfce95ebdeb5b490a), TO_LIMB_T(0x0467ffaef23fc49e)  },\n      { TO_LIMB_T(0xc1769e6a7c385f1b), TO_LIMB_T(0x79bc930deac01c03),\n        TO_LIMB_T(0x5461c75a23ede3b5), TO_LIMB_T(0x6e20829e5c230c45),\n        TO_LIMB_T(0x828e0f1e772a53cd), TO_LIMB_T(0x116aefa749127bff)  },\n      { TO_LIMB_T(0x101c10bf2744c10a), TO_LIMB_T(0xbbf18d053a6a3154),\n        TO_LIMB_T(0xa0ecf39ef026f602), TO_LIMB_T(0xfc009d4996dc5153),\n        TO_LIMB_T(0xb9000209d5bd08d3), TO_LIMB_T(0x189e5fe4470cd73c)  },\n      { TO_LIMB_T(0x7ebd546ca1575ed2), TO_LIMB_T(0xe47d5a981d081b55),\n        TO_LIMB_T(0x57b2b625b6d4ca21), TO_LIMB_T(0xb0a1ba04228520cc),\n        TO_LIMB_T(0x98738983c2107ff3), TO_LIMB_T(0x13dddbc4799d81d6)  },\n      { TO_LIMB_T(0x09319f2e39834935), TO_LIMB_T(0x039e952cbdb05c21),\n        TO_LIMB_T(0x55ba77a9a2f76493), TO_LIMB_T(0xfd04e3dfc6086467),\n        TO_LIMB_T(0xfb95832e7d78742e), TO_LIMB_T(0x0ef9c24eccaf5e0e)  }\n    };\n    vec384 Zz_powers[15], map[15], xn, xd, yn, yd;\n\n    /* lay down Z^2 powers in descending order                          */\n    sqr_fp(Zz_powers[14], p->Z);                        /* ZZ^1         */\n#ifdef __OPTIMIZE_SIZE__\n    for (size_t i = 14; i > 0; i--)\n        mul_fp(Zz_powers[i-1], Zz_powers[i], Zz_powers[14]);\n#else\n    sqr_fp(Zz_powers[13], Zz_powers[14]);               /* ZZ^2  1+1    */\n    mul_fp(Zz_powers[12], Zz_powers[14], Zz_powers[13]);/* ZZ^3  2+1    */\n    sqr_fp(Zz_powers[11], Zz_powers[13]);               /* ZZ^4  2+2    */\n    mul_fp(Zz_powers[10], Zz_powers[13], Zz_powers[12]);/* ZZ^5  2+3    */\n    sqr_fp(Zz_powers[9],  Zz_powers[12]);               /* ZZ^6  3+3    */\n    mul_fp(Zz_powers[8],  Zz_powers[12], Zz_powers[11]);/* ZZ^7  3+4    */\n    sqr_fp(Zz_powers[7],  Zz_powers[11]);               /* ZZ^8  4+4    */\n    mul_fp(Zz_powers[6],  Zz_powers[11], Zz_powers[10]);/* ZZ^9  4+5    */\n    sqr_fp(Zz_powers[5],  Zz_powers[10]);               /* ZZ^10 5+5    */\n    mul_fp(Zz_powers[4],  Zz_powers[10], Zz_powers[9]); /* ZZ^11 5+6    */\n    sqr_fp(Zz_powers[3],  Zz_powers[9]);                /* ZZ^12 6+6    */\n    mul_fp(Zz_powers[2],  Zz_powers[9],  Zz_powers[8]); /* ZZ^13 6+7    */\n    sqr_fp(Zz_powers[1],  Zz_powers[8]);                /* ZZ^14 7+7    */\n    mul_fp(Zz_powers[0],  Zz_powers[8],  Zz_powers[7]); /* ZZ^15 7+8    */\n#endif\n\n    map_fp_times_Zz(map, isogeny_map_x_num, Zz_powers + 4, 11);\n    mul_fp(xn, p->X, isogeny_map_x_num[11]);\n    add_fp(xn, xn, map[10]);\n    map_fp(xn, p->X, map, 10);\n\n    map_fp_times_Zz(map, isogeny_map_x_den, Zz_powers + 5, 10);\n    add_fp(xd, p->X, map[9]);\n    map_fp(xd, p->X, map, 9);\n    mul_fp(xd, xd, Zz_powers[14]);      /* xd *= Z^2                    */\n\n    map_fp_times_Zz(map, isogeny_map_y_num, Zz_powers, 15);\n    mul_fp(yn, p->X, isogeny_map_y_num[15]);\n    add_fp(yn, yn, map[14]);\n    map_fp(yn, p->X, map, 14);\n    mul_fp(yn, yn, p->Y);               /* yn *= Y                      */\n\n    map_fp_times_Zz(map, isogeny_map_y_den, Zz_powers, 15);\n    add_fp(yd, p->X, map[14]);\n    map_fp(yd, p->X, map, 14);\n    mul_fp(Zz_powers[14], Zz_powers[14], p->Z);\n    mul_fp(yd, yd, Zz_powers[14]);      /* yd *= Z^3                    */\n\n    /* convert (xn, xd, yn, yd) to Jacobian coordinates                 */\n    mul_fp(out->Z, xd, yd);             /* Z = xd * yd                  */\n    mul_fp(out->X, xn, yd);\n    mul_fp(out->X, out->X, out->Z);     /* X = xn * xd * yd^2           */\n    sqr_fp(out->Y, out->Z);\n    mul_fp(out->Y, out->Y, xd);\n    mul_fp(out->Y, out->Y, yn);         /* Y = yn * xd^3 * yd^2         */\n}\n\nstatic void map_to_isogenous_E1(POINTonE1 *p, const vec384 u)\n{\n    static const vec384 minus_A = { /* P - A */\n        TO_LIMB_T(0x8a9955f1650a005a), TO_LIMB_T(0x9865b3d192cfe93c),\n        TO_LIMB_T(0xaed3ed0f3ef3c441), TO_LIMB_T(0x3c962ef33d92c442),\n        TO_LIMB_T(0x22e438dbd74f94a2), TO_LIMB_T(0x04acbc265478c915)\n    };\n    static const vec384 Z = {       /* (11<<384) % P */\n        TO_LIMB_T(0x886c00000023ffdc), TO_LIMB_T(0x0f70008d3090001d),\n        TO_LIMB_T(0x77672417ed5828c3), TO_LIMB_T(0x9dac23e943dc1740),\n        TO_LIMB_T(0x50553f1b9c131521), TO_LIMB_T(0x078c712fbe0ab6e8)\n    };\n    static const vec384 sqrt_minus_ZZZ = {\n        TO_LIMB_T(0x43b571cad3215f1f), TO_LIMB_T(0xccb460ef1c702dc2),\n        TO_LIMB_T(0x742d884f4f97100b), TO_LIMB_T(0xdb2c3e3238a3382b),\n        TO_LIMB_T(0xe40f3fa13fce8f88), TO_LIMB_T(0x0073a2af9892a2ff)\n    };\n    static const vec384 ZxA = {\n        TO_LIMB_T(0x7f674ea0a8915178), TO_LIMB_T(0xb0f945fc13b8fa65),\n        TO_LIMB_T(0x4b46759a38e87d76), TO_LIMB_T(0x2e7a929641bbb6a1),\n        TO_LIMB_T(0x1668ddfa462bf6b6), TO_LIMB_T(0x00960e2ed1cf294c)\n    };\n    vec384 uu, tv2, x2n, gx1, gxd, y2;\n#if 0\n    vec384 xn, x1n, xd, y, y1, Zuu, tv4;\n#else\n# define xn     p->X\n# define y      p->Y\n# define xd     p->Z\n# define x1n    xn\n# define y1     y\n# define Zuu    x2n\n# define tv4    y1\n#endif\n#define sgn0_fp(a) (sgn0_pty_mont_384((a), BLS12_381_P, p0) & 1)\n    bool_t e1, e2;\n\n    /*\n     * as per map_to_curve() from poc/sswu_opt.sage at\n     * https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve\n     */\n    /* x numerator variants                                             */\n    sqr_fp(uu, u);                      /* uu = u^2                     */\n    mul_fp(Zuu, Z, uu);                 /* Zuu = Z * uu                 */\n    sqr_fp(tv2, Zuu);                   /* tv2 = Zuu^2                  */\n    add_fp(tv2, tv2, Zuu);              /* tv2 = tv2 + Zuu              */\n    add_fp(x1n, tv2, BLS12_381_Rx.p);   /* x1n = tv2 + 1                */\n    mul_fp(x1n, x1n, Bprime_E1);        /* x1n = x1n * B                */\n    mul_fp(x2n, Zuu, x1n);              /* x2n = Zuu * x1n              */\n\n    /* x denumenator                                                    */\n    mul_fp(xd, minus_A, tv2);           /* xd = -A * tv2                */\n    e1 = vec_is_zero(xd, sizeof(xd));   /* e1 = xd == 0                 */\n    vec_select(xd, ZxA, xd, sizeof(xd), e1);    /*              # If xd == 0, set xd = Z*A */\n\n    /* y numerators variants                                            */\n    sqr_fp(tv2, xd);                    /* tv2 = xd^2                   */\n    mul_fp(gxd, xd, tv2);               /* gxd = xd^3                   */\n    mul_fp(tv2, Aprime_E1, tv2);        /* tv2 = A * tv2                */\n    sqr_fp(gx1, x1n);                   /* gx1 = x1n^2                  */\n    add_fp(gx1, gx1, tv2);              /* gx1 = gx1 + tv2      # x1n^2 + A*xd^2 */\n    mul_fp(gx1, gx1, x1n);              /* gx1 = gx1 * x1n      # x1n^3 + A*x1n*xd^2 */\n    mul_fp(tv2, Bprime_E1, gxd);        /* tv2 = B * gxd                */\n    add_fp(gx1, gx1, tv2);              /* gx1 = gx1 + tv2      # x1^3 + A*x1*xd^2 + B*xd^3 */\n    sqr_fp(tv4, gxd);                   /* tv4 = gxd^2                  */\n    mul_fp(tv2, gx1, gxd);              /* tv2 = gx1 * gxd              */\n    mul_fp(tv4, tv4, tv2);              /* tv4 = tv4 * tv2      # gx1*gxd^3 */\n    e2 = recip_sqrt_fp(y1, tv4);        /* y1 = tv4^c1          # (gx1*gxd^3)^((p-3)/4) */\n    mul_fp(y1, y1, tv2);                /* y1 = y1 * tv2        # gx1*gxd*y1 */\n    mul_fp(y2, y1, sqrt_minus_ZZZ);     /* y2 = y1 * c2         # y2 = y1*sqrt(-Z^3) */\n    mul_fp(y2, y2, uu);                 /* y2 = y2 * uu                 */\n    mul_fp(y2, y2, u);                  /* y2 = y2 * u                  */\n\n    /* choose numerators                                                */\n    vec_select(xn, x1n, x2n, sizeof(xn), e2);   /* xn = e2 ? x1n : x2n  */\n    vec_select(y, y1, y2, sizeof(y), e2);       /* y  = e2 ? y1 : y2    */\n\n    e1 = sgn0_fp(u);\n    e2 = sgn0_fp(y);\n    cneg_fp(y, y, e1^e2);               /* fix sign of y                */\n                                        /* return (xn, xd, y, 1)        */\n\n    /* convert (xn, xd, y, 1) to Jacobian projective coordinates        */\n    mul_fp(p->X, xn, xd);               /* X = xn * xd                  */\n    mul_fp(p->Y, y, gxd);               /* Y = y * xd^3                 */\n#ifndef xd\n    vec_copy(p->Z, xd, sizeof(xd));     /* Z = xd                       */\n#else\n# undef xn\n# undef y\n# undef xd\n# undef x1n\n# undef y1\n# undef Zuu\n# undef tv4\n#endif\n#undef sgn0_fp\n}\n\nstatic void POINTonE1_add_n_dbl(POINTonE1 *out, const POINTonE1 *p, size_t n)\n{\n    POINTonE1_dadd(out, out, p, NULL);\n    while(n--)\n        POINTonE1_double(out, out);\n}\n\nstatic void POINTonE1_times_minus_z(POINTonE1 *out, const POINTonE1 *in)\n{\n    POINTonE1_double(out, in);          /*      1: 0x2                  */\n    POINTonE1_add_n_dbl(out, in, 2);    /*   2..4: 0x3..0xc             */\n    POINTonE1_add_n_dbl(out, in, 3);    /*   5..8: 0xd..0x68            */\n    POINTonE1_add_n_dbl(out, in, 9);    /*  9..18: 0x69..0xd200         */\n    POINTonE1_add_n_dbl(out, in, 32);   /* 19..51: ..0xd20100000000     */\n    POINTonE1_add_n_dbl(out, in, 16);   /* 52..68: ..0xd201000000010000 */\n}\n\n/*\n * |u|, |v| are expected to be in Montgomery representation\n */\nstatic void map_to_g1(POINTonE1 *out, const vec384 u, const vec384 v)\n{\n    POINTonE1 p;\n\n    map_to_isogenous_E1(&p, u);\n\n    if (v != NULL) {\n        map_to_isogenous_E1(out, v);    /* borrow |out|                 */\n        POINTonE1_dadd(&p, &p, out, Aprime_E1);\n    }\n\n    isogeny_map_to_E1(&p, &p);          /* sprinkle isogenous powder    */\n\n    /* clear the cofactor by multiplying |p| by 1-z, 0xd201000000010001 */\n    POINTonE1_times_minus_z(out, &p);\n    POINTonE1_dadd(out, out, &p, NULL);\n}\n\nvoid blst_map_to_g1(POINTonE1 *out, const vec384 u, const vec384 v)\n{   map_to_g1(out, u, v);   }\n\nstatic void Encode_to_G1(POINTonE1 *p, const unsigned char *msg, size_t msg_len,\n                                       const unsigned char *DST, size_t DST_len,\n                                       const unsigned char *aug, size_t aug_len)\n{\n    vec384 u[1];\n\n    hash_to_field(u, 1, aug, aug_len, msg, msg_len, DST, DST_len);\n    map_to_g1(p, u[0], NULL);\n}\n\nvoid blst_encode_to_g1(POINTonE1 *p, const unsigned char *msg, size_t msg_len,\n                                     const unsigned char *DST, size_t DST_len,\n                                     const unsigned char *aug, size_t aug_len)\n{   Encode_to_G1(p, msg, msg_len, DST, DST_len, aug, aug_len);   }\n\nstatic void Hash_to_G1(POINTonE1 *p, const unsigned char *msg, size_t msg_len,\n                                     const unsigned char *DST, size_t DST_len,\n                                     const unsigned char *aug, size_t aug_len)\n{\n    vec384 u[2];\n\n    hash_to_field(u, 2, aug, aug_len, msg, msg_len, DST, DST_len);\n    map_to_g1(p, u[0], u[1]);\n}\n\nvoid blst_hash_to_g1(POINTonE1 *p, const unsigned char *msg, size_t msg_len,\n                                   const unsigned char *DST, size_t DST_len,\n                                   const unsigned char *aug, size_t aug_len)\n{   Hash_to_G1(p, msg, msg_len, DST, DST_len, aug, aug_len);   }\n\nstatic void sigma(POINTonE1 *out, const POINTonE1 *in);\n\n#if 0\n#ifdef __OPTIMIZE_SIZE__\nstatic void POINTonE1_times_zz_minus_1_div_by_3(POINTonE1 *out,\n                                                const POINTonE1 *in)\n{\n    static const byte zz_minus_1_div_by_3[] = {\n        TO_BYTES(0x0000000055555555ULL), TO_BYTES(0x396c8c005555e156)\n    };\n    size_t n = 126-1;\n    const POINTonE1 *dblin = in;\n\n    while(n--) {\n        POINTonE1_double(out, dblin);   dblin = out;\n        if (is_bit_set(zz_minus_1_div_by_3, n))\n            POINTonE1_dadd(out, out, in, NULL);\n    }\n}\n#else\nstatic void POINTonE1_dbl_n_add(POINTonE1 *out, size_t n, const POINTonE1 *p)\n{\n    while(n--)\n        POINTonE1_double(out, out);\n    POINTonE1_dadd(out, out, p, NULL);\n}\n\nstatic void POINTonE1_times_zz_minus_1_div_by_3(POINTonE1 *out,\n                                                const POINTonE1 *in)\n{\n    POINTonE1 t3, t5, t7, t11, t85;\n\n    POINTonE1_double(&t7, in);              /* 2P */\n    POINTonE1_dadd(&t3, &t7, in, NULL);     /* 3P */\n    POINTonE1_dadd(&t5, &t3, &t7, NULL);    /* 5P */\n    POINTonE1_dadd(&t7, &t5, &t7, NULL);    /* 7P */\n    POINTonE1_double(&t85, &t5);            /* 10P */\n    POINTonE1_dadd(&t11, &t85, in, NULL);   /* 11P */\n    POINTonE1_dbl_n_add(&t85, 3, &t5);      /* 0x55P */\n                                            /* (-0xd201000000010000^2 - 1) / 3 */\n    POINTonE1_double(out, &t7);             /* 0xe */\n    POINTonE1_dbl_n_add(out, 5,  &t11);     /* 0x1cb */\n    POINTonE1_dbl_n_add(out, 3,  &t3);      /* 0xe5b */\n    POINTonE1_dbl_n_add(out, 3,  in);       /* 0x72d9 */\n    POINTonE1_dbl_n_add(out, 5,  &t3);      /* 0xe5b23 */\n    POINTonE1_dbl_n_add(out, 18, &t85);     /* 0x396c8c0055 */\n    POINTonE1_dbl_n_add(out, 8,  &t85);     /* 0x396c8c005555 */\n    POINTonE1_dbl_n_add(out, 3,  &t7);      /* 0x1cb646002aaaf */\n    POINTonE1_dbl_n_add(out, 7,  &t5);      /* 0xe5b23001555785 */\n    POINTonE1_dbl_n_add(out, 5,  &t11);     /* 0x1cb646002aaaf0ab */\n    POINTonE1_dbl_n_add(out, 41, &t85);     /* 0x396c8c005555e1560000000055 */\n    POINTonE1_dbl_n_add(out, 8,  &t85);     /* 0x396c8c005555e156000000005555 */\n    POINTonE1_dbl_n_add(out, 8,  &t85);     /* 0x396c8c005555e15600000000555555 */\n    POINTonE1_dbl_n_add(out, 8,  &t85);     /* 0x396c8c005555e1560000000055555555 */\n}\n#endif\n\nstatic bool_t POINTonE1_in_G1(const POINTonE1 *P)\n{\n    POINTonE1 t0, t1, t2;\n\n    /* Bowe, S., \"Faster subgroup checks for BLS12-381\"                   */\n    sigma(&t0, P);                        /* σ(P)                         */\n    sigma(&t1, &t0);                      /* σ²(P)                        */\n\n    POINTonE1_double(&t0, &t0);           /* 2σ(P)                        */\n    POINTonE1_dadd(&t2, &t1, P, NULL);    /* P +  σ²(P)                   */\n    POINTonE1_cneg(&t2, 1);               /* - P - σ²(P)                  */\n    POINTonE1_dadd(&t2, &t2, &t0, NULL);  /* 2σ(P) - P - σ²(P)            */\n    POINTonE1_times_zz_minus_1_div_by_3(  &t0, &t2);\n    POINTonE1_cneg(&t1, 1);\n    POINTonE1_dadd(&t0, &t0, &t1, NULL);  /* [(z²-1)/3](2σ(P) - P - σ²(P)) */\n                                          /* - σ²(P) */\n    return vec_is_zero(t0.Z, sizeof(t0.Z));\n}\n#else\nstatic bool_t POINTonE1_in_G1(const POINTonE1 *P)\n{\n    POINTonE1 t0, t1;\n\n    /* Scott, M., https://eprint.iacr.org/2021/1130 */\n    POINTonE1_times_minus_z(&t0, P);\n    POINTonE1_times_minus_z(&t1, &t0);\n    POINTonE1_cneg(&t1, 1);             /* [-z²]P   */\n\n    sigma(&t0, P);                      /* σ(P)     */\n    sigma(&t0, &t0);                    /* σ²(P)    */\n\n    return POINTonE1_is_equal(&t0, &t1);\n}\n#endif\n\nint blst_p1_in_g1(const POINTonE1 *p)\n{   return (int)POINTonE1_in_G1(p);   }\n\nint blst_p1_affine_in_g1(const POINTonE1_affine *p)\n{\n    POINTonE1 P;\n\n    vec_copy(P.X, p->X, 2*sizeof(P.X));\n    vec_select(P.Z, p->X, BLS12_381_Rx.p, sizeof(P.Z),\n                     vec_is_zero(p, sizeof(*p)));\n\n    return (int)POINTonE1_in_G1(&P);\n}\n"
  },
  {
    "path": "src/map_to_g2.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"point.h\"\n#include \"fields.h\"\n\n/*\n * y^2 = x^3 + A'*x + B', isogenous one\n */\nstatic const vec384x Aprime_E2 = {      /* 240*i */\n  { 0 },\n  { TO_LIMB_T(0xe53a000003135242), TO_LIMB_T(0x01080c0fdef80285),\n    TO_LIMB_T(0xe7889edbe340f6bd), TO_LIMB_T(0x0b51375126310601),\n    TO_LIMB_T(0x02d6985717c744ab), TO_LIMB_T(0x1220b4e979ea5467) }\n};\nstatic const vec384x Bprime_E2 = {      /* 1012 + 1012*i */\n  { TO_LIMB_T(0x22ea00000cf89db2), TO_LIMB_T(0x6ec832df71380aa4),\n    TO_LIMB_T(0x6e1b94403db5a66e), TO_LIMB_T(0x75bf3c53a79473ba),\n    TO_LIMB_T(0x3dd3a569412c0a34), TO_LIMB_T(0x125cdb5e74dc4fd1) },\n  { TO_LIMB_T(0x22ea00000cf89db2), TO_LIMB_T(0x6ec832df71380aa4),\n    TO_LIMB_T(0x6e1b94403db5a66e), TO_LIMB_T(0x75bf3c53a79473ba),\n    TO_LIMB_T(0x3dd3a569412c0a34), TO_LIMB_T(0x125cdb5e74dc4fd1) }\n};\n\nstatic void map_fp2_times_Zz(vec384x map[], const vec384x isogeny_map[],\n                             const vec384x Zz_powers[], size_t n)\n{\n    while (n--)\n        mul_fp2(map[n], isogeny_map[n], Zz_powers[n]);\n}\n\nstatic void map_fp2(vec384x acc, const vec384x x, const vec384x map[], size_t n)\n{\n    while (n--) {\n        mul_fp2(acc, acc, x);\n        add_fp2(acc, acc, map[n]);\n    }\n}\n\nstatic void isogeny_map_to_E2(POINTonE2 *out, const POINTonE2 *p)\n{\n    /*\n     * x = x_num / x_den, where\n     * x_num = k_(1,3) * x'^3 + k_(1,2) * x'^2 + k_(1,1) * x' + k_(1,0)\n     * ...\n     */\n    static const vec384x isogeny_map_x_num[] = {    /* (k_(1,*)<<384) % P   */\n     {{ TO_LIMB_T(0x47f671c71ce05e62), TO_LIMB_T(0x06dd57071206393e),\n        TO_LIMB_T(0x7c80cd2af3fd71a2), TO_LIMB_T(0x048103ea9e6cd062),\n        TO_LIMB_T(0xc54516acc8d037f6), TO_LIMB_T(0x13808f550920ea41) },\n      { TO_LIMB_T(0x47f671c71ce05e62), TO_LIMB_T(0x06dd57071206393e),\n        TO_LIMB_T(0x7c80cd2af3fd71a2), TO_LIMB_T(0x048103ea9e6cd062),\n        TO_LIMB_T(0xc54516acc8d037f6), TO_LIMB_T(0x13808f550920ea41) }},\n     {{ 0 },\n      { TO_LIMB_T(0x5fe55555554c71d0), TO_LIMB_T(0x873fffdd236aaaa3),\n        TO_LIMB_T(0x6a6b4619b26ef918), TO_LIMB_T(0x21c2888408874945),\n        TO_LIMB_T(0x2836cda7028cabc5), TO_LIMB_T(0x0ac73310a7fd5abd) }},\n     {{ TO_LIMB_T(0x0a0c5555555971c3), TO_LIMB_T(0xdb0c00101f9eaaae),\n        TO_LIMB_T(0xb1fb2f941d797997), TO_LIMB_T(0xd3960742ef416e1c),\n        TO_LIMB_T(0xb70040e2c20556f4), TO_LIMB_T(0x149d7861e581393b) },\n      { TO_LIMB_T(0xaff2aaaaaaa638e8), TO_LIMB_T(0x439fffee91b55551),\n        TO_LIMB_T(0xb535a30cd9377c8c), TO_LIMB_T(0x90e144420443a4a2),\n        TO_LIMB_T(0x941b66d3814655e2), TO_LIMB_T(0x0563998853fead5e) }},\n     {{ TO_LIMB_T(0x40aac71c71c725ed), TO_LIMB_T(0x190955557a84e38e),\n        TO_LIMB_T(0xd817050a8f41abc3), TO_LIMB_T(0xd86485d4c87f6fb1),\n        TO_LIMB_T(0x696eb479f885d059), TO_LIMB_T(0x198e1a74328002d2) },\n      { 0 }}\n    };\n    /* ...\n     * x_den = x'^2 + k_(2,1) * x' + k_(2,0)\n     */\n    static const vec384x isogeny_map_x_den[] = {    /* (k_(2,*)<<384) % P   */\n     {{ 0 },\n      { TO_LIMB_T(0x1f3affffff13ab97), TO_LIMB_T(0xf25bfc611da3ff3e),\n        TO_LIMB_T(0xca3757cb3819b208), TO_LIMB_T(0x3e6427366f8cec18),\n        TO_LIMB_T(0x03977bc86095b089), TO_LIMB_T(0x04f69db13f39a952) }},\n     {{ TO_LIMB_T(0x447600000027552e), TO_LIMB_T(0xdcb8009a43480020),\n        TO_LIMB_T(0x6f7ee9ce4a6e8b59), TO_LIMB_T(0xb10330b7c0a95bc6),\n        TO_LIMB_T(0x6140b1fcfb1e54b7), TO_LIMB_T(0x0381be097f0bb4e1) },\n      { TO_LIMB_T(0x7588ffffffd8557d), TO_LIMB_T(0x41f3ff646e0bffdf),\n        TO_LIMB_T(0xf7b1e8d2ac426aca), TO_LIMB_T(0xb3741acd32dbb6f8),\n        TO_LIMB_T(0xe9daf5b9482d581f), TO_LIMB_T(0x167f53e0ba7431b8) }}\n    };\n    /*\n     * y = y' * y_num / y_den, where\n     * y_num = k_(3,3) * x'^3 + k_(3,2) * x'^2 + k_(3,1) * x' + k_(3,0)\n     * ...\n     */\n    static const vec384x isogeny_map_y_num[] = {    /* (k_(3,*)<<384) % P   */\n     {{ TO_LIMB_T(0x96d8f684bdfc77be), TO_LIMB_T(0xb530e4f43b66d0e2),\n        TO_LIMB_T(0x184a88ff379652fd), TO_LIMB_T(0x57cb23ecfae804e1),\n        TO_LIMB_T(0x0fd2e39eada3eba9), TO_LIMB_T(0x08c8055e31c5d5c3) },\n      { TO_LIMB_T(0x96d8f684bdfc77be), TO_LIMB_T(0xb530e4f43b66d0e2),\n        TO_LIMB_T(0x184a88ff379652fd), TO_LIMB_T(0x57cb23ecfae804e1),\n        TO_LIMB_T(0x0fd2e39eada3eba9), TO_LIMB_T(0x08c8055e31c5d5c3) }},\n     {{ 0 },\n      { TO_LIMB_T(0xbf0a71c71c91b406), TO_LIMB_T(0x4d6d55d28b7638fd),\n        TO_LIMB_T(0x9d82f98e5f205aee), TO_LIMB_T(0xa27aa27b1d1a18d5),\n        TO_LIMB_T(0x02c3b2b2d2938e86), TO_LIMB_T(0x0c7d13420b09807f) }},\n     {{ TO_LIMB_T(0xd7f9555555531c74), TO_LIMB_T(0x21cffff748daaaa8),\n        TO_LIMB_T(0x5a9ad1866c9bbe46), TO_LIMB_T(0x4870a2210221d251),\n        TO_LIMB_T(0x4a0db369c0a32af1), TO_LIMB_T(0x02b1ccc429ff56af) },\n      { TO_LIMB_T(0xe205aaaaaaac8e37), TO_LIMB_T(0xfcdc000768795556),\n        TO_LIMB_T(0x0c96011a8a1537dd), TO_LIMB_T(0x1c06a963f163406e),\n        TO_LIMB_T(0x010df44c82a881e6), TO_LIMB_T(0x174f45260f808feb) }},\n     {{ TO_LIMB_T(0xa470bda12f67f35c), TO_LIMB_T(0xc0fe38e23327b425),\n        TO_LIMB_T(0xc9d3d0f2c6f0678d), TO_LIMB_T(0x1c55c9935b5a982e),\n        TO_LIMB_T(0x27f6c0e2f0746764), TO_LIMB_T(0x117c5e6e28aa9054) },\n      { 0 }}\n    };\n    /* ...\n     * y_den = x'^3 + k_(4,2) * x'^2 + k_(4,1) * x' + k_(4,0)\n     */\n    static const vec384x isogeny_map_y_den[] = {    /* (k_(4,*)<<384) % P   */\n     {{ TO_LIMB_T(0x0162fffffa765adf), TO_LIMB_T(0x8f7bea480083fb75),\n        TO_LIMB_T(0x561b3c2259e93611), TO_LIMB_T(0x11e19fc1a9c875d5),\n        TO_LIMB_T(0xca713efc00367660), TO_LIMB_T(0x03c6a03d41da1151) },\n      { TO_LIMB_T(0x0162fffffa765adf), TO_LIMB_T(0x8f7bea480083fb75),\n        TO_LIMB_T(0x561b3c2259e93611), TO_LIMB_T(0x11e19fc1a9c875d5),\n        TO_LIMB_T(0xca713efc00367660), TO_LIMB_T(0x03c6a03d41da1151) }},\n     {{ 0 },\n      { TO_LIMB_T(0x5db0fffffd3b02c5), TO_LIMB_T(0xd713f52358ebfdba),\n        TO_LIMB_T(0x5ea60761a84d161a), TO_LIMB_T(0xbb2c75a34ea6c44a),\n        TO_LIMB_T(0x0ac6735921c1119b), TO_LIMB_T(0x0ee3d913bdacfbf6) }},\n     {{ TO_LIMB_T(0x66b10000003affc5), TO_LIMB_T(0xcb1400e764ec0030),\n        TO_LIMB_T(0xa73e5eb56fa5d106), TO_LIMB_T(0x8984c913a0fe09a9),\n        TO_LIMB_T(0x11e10afb78ad7f13), TO_LIMB_T(0x05429d0e3e918f52) },\n      { TO_LIMB_T(0x534dffffffc4aae6), TO_LIMB_T(0x5397ff174c67ffcf),\n        TO_LIMB_T(0xbff273eb870b251d), TO_LIMB_T(0xdaf2827152870915),\n        TO_LIMB_T(0x393a9cbaca9e2dc3), TO_LIMB_T(0x14be74dbfaee5748) }}\n    };\n    vec384x Zz_powers[3], map[3], xn, xd, yn, yd;\n\n    /* lay down Z^2 powers in descending order                          */\n    sqr_fp2(Zz_powers[2], p->Z);                       /* ZZ^1          */\n    sqr_fp2(Zz_powers[1], Zz_powers[2]);               /* ZZ^2  1+1     */\n    mul_fp2(Zz_powers[0], Zz_powers[2], Zz_powers[1]); /* ZZ^3  2+1     */\n\n    map_fp2_times_Zz(map, isogeny_map_x_num, Zz_powers, 3);\n    mul_fp2(xn, p->X, isogeny_map_x_num[3]);\n    add_fp2(xn, xn, map[2]);\n    map_fp2(xn, p->X, map, 2);\n\n    map_fp2_times_Zz(map, isogeny_map_x_den, Zz_powers + 1, 2);\n    add_fp2(xd, p->X, map[1]);\n    map_fp2(xd, p->X, map, 1);\n    mul_fp2(xd, xd, Zz_powers[2]);      /* xd *= Z^2                    */\n\n    map_fp2_times_Zz(map, isogeny_map_y_num, Zz_powers, 3);\n    mul_fp2(yn, p->X, isogeny_map_y_num[3]);\n    add_fp2(yn, yn, map[2]);\n    map_fp2(yn, p->X, map, 2);\n    mul_fp2(yn, yn, p->Y);              /* yn *= Y                      */\n\n    map_fp2_times_Zz(map, isogeny_map_y_den, Zz_powers, 3);\n    add_fp2(yd, p->X, map[2]);\n    map_fp2(yd, p->X, map, 2);\n    mul_fp2(Zz_powers[2], Zz_powers[2], p->Z);\n    mul_fp2(yd, yd, Zz_powers[2]);      /* yd *= Z^3                    */\n\n    /* convert (xn, xd, yn, yd) to Jacobian coordinates                 */\n    mul_fp2(out->Z, xd, yd);            /* Z = xd * yd                  */\n    mul_fp2(out->X, xn, yd);\n    mul_fp2(out->X, out->X, out->Z);    /* X = xn * xd * yd^2           */\n    sqr_fp2(out->Y, out->Z);\n    mul_fp2(out->Y, out->Y, xd);\n    mul_fp2(out->Y, out->Y, yn);        /* Y = yn * xd^3 * yd^2         */\n}\n\nstatic void map_to_isogenous_E2(POINTonE2 *p, const vec384x u)\n{\n    static const vec384x minus_A = {\n      { 0 },\n      { TO_LIMB_T(0xd4c4fffffcec5869), TO_LIMB_T(0x1da3f3eed25bfd79),\n        TO_LIMB_T(0x7fa833c5136fff67), TO_LIMB_T(0x59261433cd540cbd),\n        TO_LIMB_T(0x48450f5f2b84682c), TO_LIMB_T(0x07e05d00bf959233) }\n    };\n    static const vec384x Z = {              /* -2 - i */\n      { TO_LIMB_T(0x87ebfffffff9555c), TO_LIMB_T(0x656fffe5da8ffffa),\n        TO_LIMB_T(0x0fd0749345d33ad2), TO_LIMB_T(0xd951e663066576f4),\n        TO_LIMB_T(0xde291a3d41e980d3), TO_LIMB_T(0x0815664c7dfe040d) },\n      { TO_LIMB_T(0x43f5fffffffcaaae), TO_LIMB_T(0x32b7fff2ed47fffd),\n        TO_LIMB_T(0x07e83a49a2e99d69), TO_LIMB_T(0xeca8f3318332bb7a),\n        TO_LIMB_T(0xef148d1ea0f4c069), TO_LIMB_T(0x040ab3263eff0206) }\n    };\n    static const vec384x recip_ZZZ = {      /* 1/(Z^3) */\n      { TO_LIMB_T(0x65018f5c28f598eb), TO_LIMB_T(0xe6020417f022d916),\n        TO_LIMB_T(0xd6327313288369c7), TO_LIMB_T(0x622ded8eb447156f),\n        TO_LIMB_T(0xe52a2aee72c2a01f), TO_LIMB_T(0x089812fb8481ffe4) },\n      { TO_LIMB_T(0x2574eb851eb8619f), TO_LIMB_T(0xdba2e97912925604),\n        TO_LIMB_T(0x67e495a909e7a18e), TO_LIMB_T(0xdf2da23b8145b8f7),\n        TO_LIMB_T(0xcf5d3728310ebf6d), TO_LIMB_T(0x11be446236f4c116) }\n    };\n    static const vec384x magic_ZZZ = {      /* 1/Z^3 = a + b*i */\n                                            /* a^2 + b^2 */\n      { TO_LIMB_T(0xaa7eb851eb8508e0), TO_LIMB_T(0x1c54fdf360989374),\n        TO_LIMB_T(0xc87f2fc6e716c62e), TO_LIMB_T(0x0124aefb1f9efea7),\n        TO_LIMB_T(0xb2f8be63e844865c), TO_LIMB_T(0x08b47f775a7ef35a) },\n                                            /* (a^2 + b^2)^((P-3)/4) */\n      { TO_LIMB_T(0xe4132bbd838cf70a), TO_LIMB_T(0x01d769ac83772c19),\n        TO_LIMB_T(0xa83dd6e974c22e45), TO_LIMB_T(0xbc8ec3e777b08dff),\n        TO_LIMB_T(0xc035c2042ecf5da3), TO_LIMB_T(0x073929e97f0850bf) }\n    };\n    static const vec384x ZxA = {            /* 240 - 480*i */\n      { TO_LIMB_T(0xe53a000003135242), TO_LIMB_T(0x01080c0fdef80285),\n        TO_LIMB_T(0xe7889edbe340f6bd), TO_LIMB_T(0x0b51375126310601),\n        TO_LIMB_T(0x02d6985717c744ab), TO_LIMB_T(0x1220b4e979ea5467) },\n      { TO_LIMB_T(0xa989fffff9d8b0d2), TO_LIMB_T(0x3b47e7dda4b7faf3),\n        TO_LIMB_T(0xff50678a26dffece), TO_LIMB_T(0xb24c28679aa8197a),\n        TO_LIMB_T(0x908a1ebe5708d058), TO_LIMB_T(0x0fc0ba017f2b2466) }\n    };\n    vec384x uu, tv2, tv4, x2n, gx1, gxd, y2;\n#if 0\n    vec384x xn, x1n, xd, y, y1, Zuu;\n#else\n# define xn     p->X\n# define y      p->Y\n# define xd     p->Z\n# define x1n    xn\n# define y1     y\n# define Zuu    x2n\n#endif\n#define sgn0_fp2(a) (sgn0_pty_mont_384x((a), BLS12_381_P, p0) & 1)\n    bool_t e1, e2;\n\n    /*\n     * as per map_to_curve() from poc/sswu_opt.sage at\n     * https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve\n     * with 9mod16 twists...\n     */\n    /* x numerator variants                                             */\n    sqr_fp2(uu, u);                     /* uu = u^2                     */\n    mul_fp2(Zuu, Z, uu);                /* Zuu = Z * uu                 */\n    sqr_fp2(tv2, Zuu);                  /* tv2 = Zuu^2                  */\n    add_fp2(tv2, tv2, Zuu);             /* tv2 = tv2 + Zuu              */\n    add_fp2(x1n, tv2, BLS12_381_Rx.p2); /* x1n = tv2 + 1                */\n    mul_fp2(x1n, x1n, Bprime_E2);       /* x1n = x1n * B                */\n    mul_fp2(x2n, Zuu, x1n);             /* x2n = Zuu * x1n              */\n\n    /* x denumenator                                                    */\n    mul_fp2(xd, minus_A, tv2);          /* xd = -A * tv2                */\n    e1 = vec_is_zero(xd, sizeof(xd));   /* e1 = xd == 0                 */\n    vec_select(xd, ZxA, xd, sizeof(xd), e1);    /*              # If xd == 0, set xd = Z*A */\n\n    /* y numerators variants                                            */\n    sqr_fp2(tv2, xd);                   /* tv2 = xd^2                   */\n    mul_fp2(gxd, xd, tv2);              /* gxd = xd^3                   */\n    mul_fp2(tv2, Aprime_E2, tv2);       /* tv2 = A * tv2                */\n    sqr_fp2(gx1, x1n);                  /* gx1 = x1n^2                  */\n    add_fp2(gx1, gx1, tv2);             /* gx1 = gx1 + tv2      # x1n^2 + A*xd^2 */\n    mul_fp2(gx1, gx1, x1n);             /* gx1 = gx1 * x1n      # x1n^3 + A*x1n*xd^2 */\n    mul_fp2(tv2, Bprime_E2, gxd);       /* tv2 = B * gxd                */\n    add_fp2(gx1, gx1, tv2);             /* gx1 = gx1 + tv2      # x1^3 + A*x1*xd^2 + B*xd^3 */\n    sqr_fp2(tv4, gxd);                  /* tv4 = gxd^2                  */\n    mul_fp2(tv2, gx1, gxd);             /* tv2 = gx1 * gxd              */\n    mul_fp2(tv4, tv4, tv2);             /* tv4 = tv4 * tv2      # gx1*gxd^3 */\n    e2 = recip_sqrt_fp2(y1, tv4,        /* y1 = tv4^c1          # (gx1*gxd^3)^((p^2-9)/16) */\n                        recip_ZZZ, magic_ZZZ);\n    mul_fp2(y1, y1, tv2);               /* y1 = y1 * tv2        # gx1*gxd*y1 */\n    mul_fp2(y2, y1, uu);                /* y2 = y1 * uu                 */\n    mul_fp2(y2, y2, u);                 /* y2 = y2 * u                  */\n\n    /* choose numerators                                                */\n    vec_select(xn, x1n, x2n, sizeof(xn), e2);   /* xn = e2 ? x1n : x2n  */\n    vec_select(y, y1, y2, sizeof(y), e2);       /* y  = e2 ? y1 : y2    */\n\n    e1 = sgn0_fp2(u);\n    e2 = sgn0_fp2(y);\n    cneg_fp2(y, y, e1^e2);              /* fix sign of y                */\n                                        /* return (xn, xd, y, 1)        */\n\n    /* convert (xn, xd, y, 1) to Jacobian projective coordinates        */\n    mul_fp2(p->X, xn, xd);              /* X = xn * xd                  */\n    mul_fp2(p->Y, y, gxd);              /* Y = y * xd^3                 */\n#ifndef xd\n    vec_copy(p->Z, xd, sizeof(xd));     /* Z = xd                       */\n#else\n# undef xn\n# undef y\n# undef xd\n# undef x1n\n# undef y1\n# undef Zuu\n# undef tv4\n#endif\n#undef sgn0_fp2\n}\n\n#if 0\nstatic const byte h_eff[] = {\n    TO_BYTES(0xe8020005aaa95551), TO_BYTES(0x59894c0adebbf6b4),\n    TO_BYTES(0xe954cbc06689f6a3), TO_BYTES(0x2ec0ec69d7477c1a),\n    TO_BYTES(0x6d82bf015d1212b0), TO_BYTES(0x329c2f178731db95),\n    TO_BYTES(0x9986ff031508ffe1), TO_BYTES(0x88e2a8e9145ad768),\n    TO_BYTES(0x584c6a0ea91b3528), TO_BYTES(0x0bc69f08f2ee75b3)\n};\n\nstatic void clear_cofactor(POINTonE2 *out, const POINTonE2 *p)\n{    POINTonE2_mult_w5(out, p, h_eff, 636);   }\n#else\n/*\n * As per suggestions in \"7. Clearing the cofactor\" at\n * https://tools.ietf.org/html/draft-irtf-cfrg-hash-to-curve-06\n */\nstatic void POINTonE2_add_n_dbl(POINTonE2 *out, const POINTonE2 *p, size_t n)\n{\n    POINTonE2_dadd(out, out, p, NULL);\n    while(n--)\n        POINTonE2_double(out, out);\n}\n\nstatic void POINTonE2_times_minus_z(POINTonE2 *out, const POINTonE2 *in)\n{\n    POINTonE2_double(out, in);          /*      1: 0x2                  */\n    POINTonE2_add_n_dbl(out, in, 2);    /*   2..4: 0x3..0xc             */\n    POINTonE2_add_n_dbl(out, in, 3);    /*   5..8: 0xd..0x68            */\n    POINTonE2_add_n_dbl(out, in, 9);    /*  9..18: 0x69..0xd200         */\n    POINTonE2_add_n_dbl(out, in, 32);   /* 19..51: ..0xd20100000000     */\n    POINTonE2_add_n_dbl(out, in, 16);   /* 52..68: ..0xd201000000010000 */\n}\n\nstatic void psi(POINTonE2 *out, const POINTonE2 *in);\n\nstatic void clear_cofactor(POINTonE2 *out, const POINTonE2 *p)\n{\n    POINTonE2 t0, t1;\n\n    /* A.Budroni, F.Pintore, \"Efficient hash maps to G2 on BLS curves\"  */\n    POINTonE2_double(out, p);           /* out = 2P                     */\n    psi(out, out);                      /* out = Ψ(2P)                  */\n    psi(out, out);                      /* out = Ψ²(2P)                 */\n\n    vec_copy(&t0, p, sizeof(t0));\n    POINTonE2_cneg(&t0, 1);             /* t0 = -P                      */\n    psi(&t1, &t0);                      /* t1 = -Ψ(P)                   */\n    POINTonE2_dadd(out, out, &t0, NULL);/* out = Ψ²(2P) - P             */\n    POINTonE2_dadd(out, out, &t1, NULL);/* out = Ψ²(2P) - P - Ψ(P)      */\n\n    POINTonE2_times_minus_z(&t0, p);    /* t0 = [-z]P                   */\n    POINTonE2_dadd(&t0, &t0, p, NULL);  /* t0 = [-z + 1]P               */\n    POINTonE2_dadd(&t0, &t0, &t1, NULL);/* t0 = [-z + 1]P - Ψ(P)        */\n    POINTonE2_times_minus_z(&t1, &t0);  /* t1 = [z² - z]P + [z]Ψ(P)     */\n    POINTonE2_dadd(out, out, &t1, NULL);/* out = [z² - z - 1]P          */\n                                        /*     + [z - 1]Ψ(P)            */\n                                        /*     + Ψ²(2P)                 */\n}\n#endif\n\n/*\n * |u|, |v| are expected to be in Montgomery representation\n */\nstatic void map_to_g2(POINTonE2 *out, const vec384x u, const vec384x v)\n{\n    POINTonE2 p;\n\n    map_to_isogenous_E2(&p, u);\n\n    if (v != NULL) {\n        map_to_isogenous_E2(out, v);    /* borrow |out|                 */\n        POINTonE2_dadd(&p, &p, out, Aprime_E2);\n    }\n\n    isogeny_map_to_E2(&p, &p);          /* sprinkle isogenous powder    */\n    clear_cofactor(out, &p);\n}\n\nvoid blst_map_to_g2(POINTonE2 *out, const vec384x u, const vec384x v)\n{   map_to_g2(out, u, v);   }\n\nstatic void Encode_to_G2(POINTonE2 *p, const unsigned char *msg, size_t msg_len,\n                                       const unsigned char *DST, size_t DST_len,\n                                       const unsigned char *aug, size_t aug_len)\n{\n    vec384x u[1];\n\n    hash_to_field(u[0], 2, aug, aug_len, msg, msg_len, DST, DST_len);\n    map_to_g2(p, u[0], NULL);\n}\n\nvoid blst_encode_to_g2(POINTonE2 *p, const unsigned char *msg, size_t msg_len,\n                                     const unsigned char *DST, size_t DST_len,\n                                     const unsigned char *aug, size_t aug_len)\n{   Encode_to_G2(p, msg, msg_len, DST, DST_len, aug, aug_len);   }\n\nstatic void Hash_to_G2(POINTonE2 *p, const unsigned char *msg, size_t msg_len,\n                                     const unsigned char *DST, size_t DST_len,\n                                     const unsigned char *aug, size_t aug_len)\n{\n    vec384x u[2];\n\n    hash_to_field(u[0], 4, aug, aug_len, msg, msg_len, DST, DST_len);\n    map_to_g2(p, u[0], u[1]);\n}\n\nvoid blst_hash_to_g2(POINTonE2 *p, const unsigned char *msg, size_t msg_len,\n                                   const unsigned char *DST, size_t DST_len,\n                                   const unsigned char *aug, size_t aug_len)\n{   Hash_to_G2(p, msg, msg_len, DST, DST_len, aug, aug_len);   }\n\nstatic bool_t POINTonE2_in_G2(const POINTonE2 *P)\n{\n#if 0\n    POINTonE2 t0, t1, t2;\n\n    /* Bowe, S., \"Faster subgroup checks for BLS12-381\"                 */\n    psi(&t0, P);                        /* Ψ(P)                         */\n    psi(&t0, &t0);                      /* Ψ²(P)                        */\n    psi(&t1, &t0);                      /* Ψ³(P)                        */\n\n    POINTonE2_times_minus_z(&t2, &t1);\n    POINTonE2_dadd(&t0, &t0, &t2, NULL);\n    POINTonE2_cneg(&t0, 1);\n    POINTonE2_dadd(&t0, &t0, P, NULL);  /* [z]Ψ³(P) - Ψ²(P) + P         */\n\n    return vec_is_zero(t0.Z, sizeof(t0.Z));\n#else\n    POINTonE2 t0, t1;\n\n    /* Scott, M., https://eprint.iacr.org/2021/1130 */\n    psi(&t0, P);                            /* Ψ(P) */\n\n    POINTonE2_times_minus_z(&t1, P);\n    POINTonE2_cneg(&t1, 1);                 /* [z]P */\n\n    return POINTonE2_is_equal(&t0, &t1);\n#endif\n}\n\nint blst_p2_in_g2(const POINTonE2 *p)\n{   return (int)POINTonE2_in_G2(p);   }\n\nint blst_p2_affine_in_g2(const POINTonE2_affine *p)\n{\n    POINTonE2 P;\n\n    vec_copy(P.X, p->X, 2*sizeof(P.X));\n    vec_select(P.Z, p->X, BLS12_381_Rx.p, sizeof(P.Z),\n                     vec_is_zero(p, sizeof(*p)));\n\n    return (int)POINTonE2_in_G2(&P);\n}\n"
  },
  {
    "path": "src/multi_scalar.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"fields.h\"\n#include \"point.h\"\n\n#define POINTS_TO_AFFINE_IMPL(prefix, ptype, bits, field) \\\nstatic void ptype##s_to_affine(ptype##_affine dst[], \\\n                               const ptype *const points[], size_t npoints) \\\n{ \\\n    size_t i; \\\n    vec##bits *acc, ZZ, ZZZ; \\\n    const ptype *point = NULL; \\\n    const size_t stride = sizeof(ptype)==sizeof(POINTonE1) ? 1536 : 768; \\\n\\\n    while (npoints) { \\\n        const ptype *p, *const *walkback; \\\n        size_t delta = stride<npoints ? stride : npoints; \\\n\\\n        point = *points ? *points++ : point+1; \\\n        acc = (vec##bits *)dst; \\\n        vec_select(acc++, BLS12_381_Rx.p, point->Z, sizeof(vec##bits), \\\n                          vec_is_zero(point->Z, sizeof(point->Z))); \\\n        for (i = 1; i < delta; i++, acc++) { \\\n            point = *points ? *points++ : point+1; \\\n            vec_select(acc[0], BLS12_381_Rx.p, point->Z, sizeof(vec##bits), \\\n                               vec_is_zero(point->Z, sizeof(point->Z))); \\\n            mul_##field(acc[0], acc[0], acc[-1]); \\\n        } \\\n\\\n        --acc; reciprocal_##field(acc[0], acc[0]); \\\n\\\n        walkback = points-1, p = point, --delta, dst += delta; \\\n        for (i = 0; i < delta; i++, acc--, dst--) { \\\n            bool_t is_inf = vec_is_zero(p->Z, sizeof(p->Z)); \\\n            mul_##field(acc[-1], acc[-1], acc[0]);  /* 1/Z        */\\\n            sqr_##field(ZZ, acc[-1]);               /* 1/Z^2      */\\\n            mul_##field(ZZZ, ZZ, acc[-1]);          /* 1/Z^3      */\\\n            vec_select(acc[-1], BLS12_381_Rx.p, p->Z, sizeof(vec##bits), \\\n                                is_inf); \\\n            mul_##field(acc[-1], acc[-1], acc[0]);  \\\n            mul_##field(dst->X,  p->X, ZZ);         /* X = X'/Z^2 */\\\n            mul_##field(dst->Y,  p->Y, ZZZ);        /* Y = Y'/Z^3 */\\\n            vec_czero(dst, sizeof(*dst), is_inf); \\\n            p = (p == *walkback) ? *--walkback : p-1; \\\n        } \\\n        sqr_##field(ZZ, acc[0]);                    /* 1/Z^2      */\\\n        mul_##field(ZZZ, ZZ, acc[0]);               /* 1/Z^3      */\\\n        mul_##field(dst->X, p->X, ZZ);              /* X = X'/Z^2 */\\\n        mul_##field(dst->Y, p->Y, ZZZ);             /* Y = Y'/Z^3 */\\\n        vec_czero(dst, sizeof(*dst), vec_is_zero(p->Z, sizeof(p->Z))); \\\n        ++delta, dst += delta, npoints -= delta; \\\n    } \\\n} \\\n\\\nvoid prefix##s_to_affine(ptype##_affine dst[], const ptype *const points[], \\\n                         size_t npoints) \\\n{   ptype##s_to_affine(dst, points, npoints);   }\n\nPOINTS_TO_AFFINE_IMPL(blst_p1, POINTonE1, 384, fp)\nPOINTS_TO_AFFINE_IMPL(blst_p2, POINTonE2, 384x, fp2)\n\n/*\n * This is two-step multi-scalar multiplication procedure. First, given\n * a set of points you pre-compute a table for chosen windowing factor\n * [expressed in bits with value between 2 and 14], and then you pass\n * this table to the actual multiplication procedure along with scalars.\n * Idea is that the pre-computed table will be reused multiple times. In\n * which case multiplication runs faster than below Pippenger algorithm\n * implementation for up to ~16K points for wbits=8, naturally at the\n * expense of multi-megabyte table. One can trade even more memory for\n * performance, but each wbits increment doubles the memory requirement,\n * so at some point it gets prohibively large... For reference, without\n * reusing the table it's faster than Pippenger algorithm for up ~32\n * points [with wbits=5]...\n */\n\n#define SCRATCH_SZ(ptype) (sizeof(ptype)==sizeof(POINTonE1) ? 8192 : 4096)\n\n/* The intermediate infinity points are encoded as [0, 0, 1]. */\n\n#define PRECOMPUTE_WBITS_IMPL(prefix, ptype, bits, field, one) \\\nstatic void ptype##_precompute_row(ptype row[], size_t n, \\\n                                   const ptype##_affine *point) \\\n{ \\\n    size_t i, j; \\\n    bool_t inf = vec_is_zero(point, sizeof(*point)); \\\n                                          /* row[-1] is implicit infinity */\\\n    vec_copy(&row[0], point, sizeof(*point));           /* row[0]=p*1     */\\\n    vec_copy(&row[0].Z, one, sizeof(row[0].Z));                             \\\n    ptype##_double(&row[1],  &row[0]);                  /* row[1]=p*(1+1) */\\\n    vec_select(&row[1].Z, one, &row[1].Z, sizeof(row[1].Z), inf);           \\\n    for (i = 2, j = 1; i < n; i += 2, j++) \\\n        ptype##_add_affine(&row[i], &row[i-1], point),  /* row[2]=p*(2+1) */\\\n        ptype##_double(&row[i+1], &row[j]),             /* row[3]=p*(2+2) */\\\n        vec_select(&row[i+1].Z, one, &row[i+1].Z, sizeof(row[i+1].Z), inf); \\\n}                                                       /* row[4] ...     */\\\n\\\nstatic void ptype##s_to_affine_row_wbits(ptype##_affine dst[], ptype src[], \\\n                                         size_t wbits, size_t npoints) \\\n{ \\\n    size_t total = npoints << (wbits-1); \\\n    size_t nwin = (size_t)1 << (wbits-1); \\\n    size_t i, j; \\\n    vec##bits *acc, ZZ, ZZZ; \\\n\\\n    src += total; \\\n    acc = (vec##bits *)src; \\\n    vec_copy(acc++, one, sizeof(vec##bits)); \\\n    for (i = 0; i < npoints; i++) \\\n        for (j = nwin; --src, --j; acc++)    \\\n            mul_##field(acc[0], acc[-1], src->Z); \\\n\\\n    --acc; reciprocal_##field(acc[0], acc[0]); \\\n\\\n    for (i = 0; i < npoints; i++) { \\\n        vec_copy(dst++, src++, sizeof(ptype##_affine)); \\\n        for (j = 1; j < nwin; j++, acc--, src++, dst++) { \\\n            mul_##field(acc[-1], acc[-1], acc[0]);  /* 1/Z        */\\\n            sqr_##field(ZZ, acc[-1]);               /* 1/Z^2      */\\\n            mul_##field(ZZZ, ZZ, acc[-1]);          /* 1/Z^3      */\\\n            mul_##field(acc[-1], src->Z, acc[0]);                   \\\n            mul_##field(dst->X, src->X, ZZ);        /* X = X'/Z^2 */\\\n            mul_##field(dst->Y, src->Y, ZZZ);       /* Y = Y'/Z^3 */\\\n        } \\\n    } \\\n} \\\n\\\n/* flat |points[n]| can be placed at the end of |table[n<<(wbits-1)]| */\\\nstatic void ptype##s_precompute_wbits(ptype##_affine table[], size_t wbits, \\\n                                      const ptype##_affine *const points[], \\\n                                      size_t npoints) \\\n{ \\\n    size_t total = npoints << (wbits-1); \\\n    size_t nwin = (size_t)1 << (wbits-1); \\\n    size_t nmin = wbits>9 ? (size_t)1: (size_t)1 << (9-wbits); \\\n    size_t i, top = 0; \\\n    ptype *rows, *row; \\\n    const ptype##_affine *point = NULL; \\\n    size_t stride = ((512*1024)/sizeof(ptype##_affine)) >> wbits; \\\n    if (stride == 0) stride = 1; \\\n\\\n    while (npoints >= nmin) { \\\n        size_t limit = total - npoints; \\\n\\\n        if (top + (stride << wbits) > limit) { \\\n            stride = (limit - top) >> wbits;   \\\n            if (stride == 0) break;            \\\n        } \\\n        rows = row = (ptype *)(&table[top]); \\\n        for (i = 0; i < stride; i++, row += nwin) \\\n            point = *points ? *points++ : point+1, \\\n            ptype##_precompute_row(row, nwin, point); \\\n        ptype##s_to_affine_row_wbits(&table[top], rows, wbits, stride); \\\n        top += stride << (wbits-1); \\\n        npoints -= stride; \\\n    } \\\n    if ((i = 2*sizeof(ptype##_affine)*npoints*nwin) <= SCRATCH_LIMIT) { \\\n        rows = row = alloca(i); \\\n        for (i = 0; i < npoints; i++, row += nwin) \\\n            point = *points ? *points++ : point+1, \\\n            ptype##_precompute_row(row, nwin, point); \\\n        ptype##s_to_affine_row_wbits(&table[top], rows, wbits, npoints); \\\n    } else { \\\n        const ptype *pp[2]; \\\n\\\n        stride = SCRATCH_LIMIT / sizeof(ptype); \\\n        stride -= stride % 2; \\\n        if (stride > nwin) stride = nwin; \\\n\\\n        pp[0] = row = alloca(stride * sizeof(ptype)); \\\n        pp[1] = NULL; \\\n        for (i = 0; i < npoints; i++, top += nwin) { \\\n            size_t j, k, n; \\\n\\\n            point = *points ? *points++ : point+1; \\\n            ptype##_precompute_row(row, stride, point); \\\n            ptype##s_to_affine(&table[top], pp, stride); \\\n            for (j = stride; j < nwin; j += stride) { \\\n                n = (j+stride) <= nwin ? stride : nwin-j; \\\n                for (k = 0; k < n-1; k++) \\\n                    ptype##_add_affine(&row[k], &row[stride-1], &table[top+k]); \\\n                if (j == stride) \\\n                    ptype##_double(&row[k], &row[stride-1]); \\\n                else \\\n                    ptype##_add_affine(&row[k], &row[stride-1], &table[top+k]); \\\n                ptype##s_to_affine(&table[top+j], pp, n); \\\n            } \\\n        } \\\n    } \\\n} \\\n\\\nsize_t prefix##s_mult_wbits_precompute_sizeof(size_t wbits, size_t npoints) \\\n{ return (sizeof(ptype##_affine)*npoints) << (wbits-1); } \\\nvoid prefix##s_mult_wbits_precompute(ptype##_affine table[], size_t wbits, \\\n                                     const ptype##_affine *const points[], \\\n                                     size_t npoints) \\\n{ ptype##s_precompute_wbits(table, wbits, points, npoints); }\n\n#define POINTS_MULT_WBITS_IMPL(prefix, ptype, bits, field, one) \\\nstatic void ptype##_gather_booth_wbits(ptype *p, const ptype##_affine row[], \\\n                                       size_t wbits, limb_t booth_idx) \\\n{ \\\n    bool_t booth_sign = (booth_idx >> wbits) & 1; \\\n    bool_t idx_is_zero; \\\n    static const ptype##_affine infinity = { 0 }; \\\n\\\n    booth_idx &= ((limb_t)1 << wbits) - 1; \\\n    idx_is_zero = is_zero(booth_idx); \\\n    booth_idx -= 1 ^ idx_is_zero; \\\n    vec_select(p, &infinity, &row[booth_idx], sizeof(row[0]), idx_is_zero); \\\n    ptype##_cneg(p, booth_sign); \\\n} \\\n\\\nstatic void ptype##s_mult_wbits(ptype *ret, const ptype##_affine table[], \\\n                                size_t wbits, size_t npoints, \\\n                                const byte *const scalars[], size_t nbits, \\\n                                ptype scratch[]) \\\n{ \\\n    limb_t wmask, wval; \\\n    size_t i, j, z, nbytes, window, nwin = (size_t)1 << (wbits-1); \\\n    const byte *scalar, *const *scalar_s = scalars; \\\n    const ptype##_affine *row = table; \\\n\\\n    size_t scratch_sz = SCRATCH_SZ(ptype); \\\n    if (scratch == NULL) { \\\n        scratch_sz /= 4; /* limit to 288K */ \\\n        scratch_sz = scratch_sz < npoints ? scratch_sz : npoints; \\\n        scratch = alloca(sizeof(ptype) * scratch_sz); \\\n    } \\\n\\\n    nbytes = (nbits + 7)/8; /* convert |nbits| to bytes */ \\\n    scalar = *scalar_s++; \\\n\\\n    /* top excess bits modulo target window size */ \\\n    window = nbits % wbits; /* yes, it may be zero */ \\\n    wmask = ((limb_t)1 << (window + 1)) - 1; \\\n\\\n    nbits -= window; \\\n    z = is_zero(nbits); \\\n    wval = (get_wval_limb(scalar, nbits - (z^1), window + (z^1)) << z) & wmask; \\\n    wval = booth_encode(wval, wbits); \\\n    ptype##_gather_booth_wbits(&scratch[0], row, wbits, wval); \\\n    row += nwin; \\\n\\\n    i = 1; vec_zero(ret, sizeof(*ret)); \\\n    while (nbits > 0) { \\\n        for (j = i; i < npoints; i++, j++, row += nwin) { \\\n            if (j == scratch_sz) \\\n                ptype##s_accumulate(ret, scratch, j), j = 0; \\\n            scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \\\n            wval = get_wval_limb(scalar, nbits - 1, window + 1) & wmask; \\\n            wval = booth_encode(wval, wbits); \\\n            ptype##_gather_booth_wbits(&scratch[j], row, wbits, wval); \\\n        } \\\n        ptype##s_accumulate(ret, scratch, j); \\\n\\\n        for (j = 0; j < wbits; j++) \\\n            ptype##_double(ret, ret); \\\n\\\n        window = wbits; \\\n        wmask = ((limb_t)1 << (window + 1)) - 1; \\\n        nbits -= window; \\\n        i = 0; row = table; scalar_s = scalars; \\\n    } \\\n\\\n    for (j = i; i < npoints; i++, j++, row += nwin) { \\\n        if (j == scratch_sz) \\\n            ptype##s_accumulate(ret, scratch, j), j = 0; \\\n        scalar = *scalar_s ? *scalar_s++ : scalar+nbytes; \\\n        wval = (get_wval_limb(scalar, 0, window) << 1) & wmask; \\\n        wval = booth_encode(wval, wbits); \\\n        ptype##_gather_booth_wbits(&scratch[j], row, wbits, wval); \\\n    } \\\n    ptype##s_accumulate(ret, scratch, j); \\\n} \\\n\\\nsize_t prefix##s_mult_wbits_scratch_sizeof(size_t npoints) \\\n{ \\\n    const size_t scratch_sz = SCRATCH_SZ(ptype); \\\n    return sizeof(ptype) * (npoints < scratch_sz ? npoints : scratch_sz); \\\n} \\\nvoid prefix##s_mult_wbits(ptype *ret, const ptype##_affine table[], \\\n                          size_t wbits, size_t npoints, \\\n                          const byte *const scalars[], size_t nbits, \\\n                          ptype scratch[]) \\\n{ ptype##s_mult_wbits(ret, table, wbits, npoints, scalars, nbits, scratch); }\n\nPRECOMPUTE_WBITS_IMPL(blst_p1, POINTonE1, 384, fp, BLS12_381_Rx.p)\nPOINTS_MULT_WBITS_IMPL(blst_p1, POINTonE1, 384, fp, BLS12_381_Rx.p)\n\nPRECOMPUTE_WBITS_IMPL(blst_p2, POINTonE2, 384x, fp2, BLS12_381_Rx.p2)\nPOINTS_MULT_WBITS_IMPL(blst_p2, POINTonE2, 384x, fp2, BLS12_381_Rx.p2)\n\n/*\n * Pippenger algorithm implementation, fastest option for larger amount\n * of points...\n */\n\nstatic size_t pippenger_window_size(size_t npoints)\n{\n    size_t wbits;\n\n    for (wbits=0; npoints>>=1; wbits++) ;\n\n    if (wbits > 12)\n        return wbits - 3;\n    else if (wbits > 8)\n        return wbits - 2;\n    else if (wbits > 4)\n        return wbits - 1;\n\n    return wbits ? 2 : 1;\n}\n\n#define DECLARE_PRIVATE_POINTXYZZ(ptype, bits) \\\ntypedef struct { vec##bits X,Y,ZZZ,ZZ; } ptype##xyzz;\n\n#define POINTS_MULT_PIPPENGER_IMPL(prefix, ptype) \\\nstatic void ptype##_integrate_buckets(ptype *out, ptype##xyzz buckets[], \\\n                                                  size_t wbits) \\\n{ \\\n    ptype##xyzz ret[1], acc[1]; \\\n    size_t n = (size_t)1 << wbits; \\\n\\\n    /* Calculate sum of x[i-1]*i for i=1 through 1<<|wbits|. */\\\n    vec_copy(acc, &buckets[--n], sizeof(acc)); \\\n    vec_copy(ret, &buckets[n], sizeof(ret)); \\\n    vec_zero(&buckets[n], sizeof(buckets[n])); \\\n    while (n--) { \\\n        ptype##xyzz_dadd(acc, acc, &buckets[n]); \\\n        ptype##xyzz_dadd(ret, ret, acc); \\\n        vec_zero(&buckets[n], sizeof(buckets[n])); \\\n    } \\\n    ptype##xyzz_to_Jacobian(out, ret); \\\n} \\\n\\\nstatic void ptype##_bucket(ptype##xyzz buckets[], limb_t booth_idx, \\\n                           size_t wbits, const ptype##_affine *p) \\\n{ \\\n    bool_t booth_sign = (booth_idx >> wbits) & 1; \\\n\\\n    booth_idx &= (1<<wbits) - 1; \\\n    if (booth_idx--) \\\n        ptype##xyzz_dadd_affine(&buckets[booth_idx], &buckets[booth_idx], \\\n                                                     p, booth_sign); \\\n} \\\n\\\nstatic void ptype##_prefetch(const ptype##xyzz buckets[], limb_t booth_idx, \\\n                             size_t wbits) \\\n{ \\\n    booth_idx &= (1<<wbits) - 1; \\\n    if (booth_idx--) \\\n        vec_prefetch(&buckets[booth_idx], sizeof(buckets[booth_idx])); \\\n} \\\n\\\nstatic void ptype##s_tile_pippenger(ptype *ret, \\\n                                    const ptype##_affine *const points[], \\\n                                    size_t npoints, \\\n                                    const byte *const scalars[], size_t nbits, \\\n                                    ptype##xyzz buckets[], \\\n                                    size_t bit0, size_t wbits, size_t cbits) \\\n{ \\\n    limb_t wmask, wval, wnxt; \\\n    size_t i, z, nbytes; \\\n    const byte *scalar = *scalars++; \\\n    const ptype##_affine *point = *points++; \\\n\\\n    nbytes = (nbits + 7)/8; /* convert |nbits| to bytes */ \\\n    wmask = ((limb_t)1 << (wbits+1)) - 1; \\\n    z = is_zero(bit0); \\\n    bit0 -= z^1; wbits += z^1; \\\n    wval = (get_wval_limb(scalar, bit0, wbits) << z) & wmask; \\\n    wval = booth_encode(wval, cbits); \\\n    scalar = *scalars ? *scalars++ : scalar+nbytes; \\\n    wnxt = (get_wval_limb(scalar, bit0, wbits) << z) & wmask; \\\n    wnxt = booth_encode(wnxt, cbits); \\\n    npoints--;  /* account for prefetch */ \\\n\\\n    ptype##_bucket(buckets, wval, cbits, point); \\\n    for (i = 1; i < npoints; i++) { \\\n        wval = wnxt; \\\n        scalar = *scalars ? *scalars++ : scalar+nbytes; \\\n        wnxt = (get_wval_limb(scalar, bit0, wbits) << z) & wmask; \\\n        wnxt = booth_encode(wnxt, cbits); \\\n        ptype##_prefetch(buckets, wnxt, cbits); \\\n        point = *points ? *points++ : point+1; \\\n        ptype##_bucket(buckets, wval, cbits, point); \\\n    } \\\n    point = *points ? *points++ : point+1; \\\n    ptype##_bucket(buckets, wnxt, cbits, point); \\\n    ptype##_integrate_buckets(ret, buckets, cbits - 1); \\\n} \\\n\\\nstatic void ptype##s_mult_pippenger(ptype *ret, \\\n                                    const ptype##_affine *const points[], \\\n                                    size_t npoints, \\\n                                    const byte *const scalars[], size_t nbits, \\\n                                    ptype##xyzz buckets[], size_t window) \\\n{ \\\n    size_t i, wbits, cbits, bit0 = nbits; \\\n    ptype tile[1]; \\\n\\\n    window = window ? window : pippenger_window_size(npoints); \\\n    vec_zero(buckets, sizeof(buckets[0]) << (window-1)); \\\n    vec_zero(ret, sizeof(*ret)); \\\n\\\n    /* top excess bits modulo target window size */ \\\n    wbits = nbits % window; /* yes, it may be zero */ \\\n    cbits = wbits + 1; \\\n    while (bit0 -= wbits) { \\\n        ptype##s_tile_pippenger(tile, points, npoints, scalars, nbits, \\\n                                      buckets, bit0, wbits, cbits); \\\n        ptype##_dadd(ret, ret, tile, NULL); \\\n        for (i = 0; i < window; i++) \\\n            ptype##_double(ret, ret); \\\n        cbits = wbits = window; \\\n    } \\\n    ptype##s_tile_pippenger(tile, points, npoints, scalars, nbits, \\\n                                  buckets, 0, wbits, cbits); \\\n    ptype##_dadd(ret, ret, tile, NULL); \\\n} \\\n\\\nsize_t prefix##s_mult_pippenger_scratch_sizeof(size_t npoints) \\\n{   return sizeof(ptype##xyzz) << (pippenger_window_size(npoints)-1);   } \\\nvoid prefix##s_tile_pippenger(ptype *ret, \\\n                              const ptype##_affine *const points[], \\\n                              size_t npoints, \\\n                              const byte *const scalars[], size_t nbits, \\\n                              ptype##xyzz scratch[], \\\n                              size_t bit0, size_t window) \\\n{ \\\n    size_t wbits, cbits; \\\n\\\n    if (bit0 + window > nbits)  wbits = nbits - bit0, cbits = wbits + 1; \\\n    else                        wbits = cbits = window; \\\n    ptype##s_tile_pippenger(ret, points, npoints, scalars, nbits, scratch, \\\n                                 bit0, wbits, cbits); \\\n} \\\nvoid prefix##s_mult_pippenger(ptype *ret, \\\n                              const ptype##_affine *const points[], \\\n                              size_t npoints, \\\n                              const byte *const scalars[], size_t nbits, \\\n                              ptype##xyzz scratch[]) \\\n{ \\\n    if (npoints == 1) { \\\n        prefix##_from_affine(ret, points[0]); \\\n        ptype##_mult_w5(ret, ret, scalars[0], nbits); \\\n        return; \\\n    } \\\n    if ((npoints * sizeof(ptype##_affine) * 8 * 3) <= SCRATCH_LIMIT && \\\n        npoints < 32) { \\\n        ptype##_affine *table = alloca(npoints * sizeof(ptype##_affine) * 8); \\\n        ptype##s_precompute_wbits(table, 4, points, npoints); \\\n        ptype##s_mult_wbits(ret, table, 4, npoints, scalars, nbits, NULL); \\\n        return; \\\n    } \\\n    ptype##s_mult_pippenger(ret, points, npoints, scalars, nbits, scratch, 0); \\\n}\n\nDECLARE_PRIVATE_POINTXYZZ(POINTonE1, 384)\nPOINTXYZZ_TO_JACOBIAN_IMPL(POINTonE1, 384, fp)\nPOINTXYZZ_DADD_IMPL(POINTonE1, 384, fp)\nPOINTXYZZ_DADD_AFFINE_IMPL(POINTonE1, 384, fp, BLS12_381_Rx.p)\nPOINTS_MULT_PIPPENGER_IMPL(blst_p1, POINTonE1)\n\nDECLARE_PRIVATE_POINTXYZZ(POINTonE2, 384x)\nPOINTXYZZ_TO_JACOBIAN_IMPL(POINTonE2, 384x, fp2)\nPOINTXYZZ_DADD_IMPL(POINTonE2, 384x, fp2)\nPOINTXYZZ_DADD_AFFINE_IMPL(POINTonE2, 384x, fp2, BLS12_381_Rx.p2)\nPOINTS_MULT_PIPPENGER_IMPL(blst_p2, POINTonE2)\n"
  },
  {
    "path": "src/no_asm.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#if LIMB_T_BITS==32\ntypedef unsigned long long llimb_t;\n#endif\n\n#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 || defined(__STDC_NO_VLA__)\n# error \"unsupported compiler\"\n#endif\n\n#if defined(__clang__)\n# pragma GCC diagnostic ignored \"-Wstatic-in-inline\"\n#endif\n\n#if !defined(__clang__) && !defined(__builtin_assume)\n# if defined(__GNUC__) && __GNUC__>=5\n#  define __builtin_assume(condition) if (!(condition)) __builtin_unreachable()\n# elif defined(_MSC_VER)\n#  define __builtin_assume(condition) __assume(condition)\n# else\n#  define __builtin_assume(condition) (void)(condition)\n# endif\n#endif\n\nstatic void mul_mont_n(limb_t ret[], const limb_t a[], const limb_t b[],\n                       const limb_t p[], limb_t n0, size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t mask, borrow, mx, hi, tmp[n+1], carry;\n    size_t i, j;\n\n    for (mx=b[0], hi=0, i=0; i<n; i++) {\n        limbx = (mx * (llimb_t)a[i]) + hi;\n        tmp[i] = (limb_t)limbx;\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n    mx = n0*tmp[0];\n    tmp[i] = hi;\n\n    for (carry=0, j=0; ; ) {\n        limbx = (mx * (llimb_t)p[0]) + tmp[0];\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n        for (i=1; i<n; i++) {\n            limbx = (mx * (llimb_t)p[i] + hi) + tmp[i];\n            tmp[i-1] = (limb_t)limbx;\n            hi = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n        limbx = tmp[i] + (hi + (llimb_t)carry);\n        tmp[i-1] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n\n        if (++j==n)\n            break;\n\n        for (mx=b[j], hi=0, i=0; i<n; i++) {\n            limbx = (mx * (llimb_t)a[i] + hi) + tmp[i];\n            tmp[i] = (limb_t)limbx;\n            hi = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n        mx = n0*tmp[0];\n        limbx = hi + (llimb_t)carry;\n        tmp[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        ret[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = carry - borrow;\n    launder(mask);\n\n    for(i=0; i<n; i++)\n        ret[i] = (ret[i] & ~mask) | (tmp[i] & mask);\n}\n\n#define MUL_MONT_IMPL(bits) \\\ninline void mul_mont_##bits(vec##bits ret, const vec##bits a, \\\n                            const vec##bits b, const vec##bits p, limb_t n0) \\\n{   mul_mont_n(ret, a, b, p, n0, NLIMBS(bits));   } \\\n\\\ninline void sqr_mont_##bits(vec##bits ret, const vec##bits a, \\\n                            const vec##bits p, limb_t n0) \\\n{   mul_mont_n(ret, a, a, p, n0, NLIMBS(bits));   }\n\n/*\n * 256-bit subroutines can handle arbitrary modulus, even non-\"sparse\",\n * but we have to harmonize the naming with assembly.\n */\n#define mul_mont_256 mul_mont_sparse_256\n#define sqr_mont_256 sqr_mont_sparse_256\nMUL_MONT_IMPL(256)\n#undef mul_mont_256\n#undef sqr_mont_256\nMUL_MONT_IMPL(384)\n\nstatic void add_mod_n(limb_t ret[], const limb_t a[], const limb_t b[],\n                      const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t mask, carry, borrow, tmp[n];\n    size_t i;\n\n    for (carry=0, i=0; i<n; i++) {\n        limbx = a[i] + (b[i] + (llimb_t)carry);\n        tmp[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        ret[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = carry - borrow;\n    launder(mask);\n\n    for(i=0; i<n; i++)\n        ret[i] = (ret[i] & ~mask) | (tmp[i] & mask);\n}\n\n#define ADD_MOD_IMPL(bits) \\\ninline void add_mod_##bits(vec##bits ret, const vec##bits a, \\\n                           const vec##bits b, const vec##bits p) \\\n{   add_mod_n(ret, a, b, p, NLIMBS(bits));   }\n\nADD_MOD_IMPL(256)\nADD_MOD_IMPL(384)\n\nstatic void sub_mod_n(limb_t ret[], const limb_t a[], const limb_t b[],\n                      const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t mask, carry, borrow;\n    size_t i;\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = a[i] - (b[i] + (llimb_t)borrow);\n        ret[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = 0 - borrow;\n    launder(mask);\n\n    for (carry=0, i=0; i<n; i++) {\n        limbx = ret[i] + ((p[i] & mask) + (llimb_t)carry);\n        ret[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n}\n\n#define SUB_MOD_IMPL(bits) \\\ninline void sub_mod_##bits(vec##bits ret, const vec##bits a, \\\n                           const vec##bits b, const vec##bits p) \\\n{   sub_mod_n(ret, a, b, p, NLIMBS(bits));   }\n\nSUB_MOD_IMPL(256)\nSUB_MOD_IMPL(384)\n\nstatic void mul_by_3_mod_n(limb_t ret[], const limb_t a[], const limb_t p[],\n                           size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t mask, carry, borrow, tmp[n], two_a[n];\n    size_t i;\n\n    for (carry=0, i=0; i<n; i++) {\n        limb_t a_i = a[i];\n        tmp[i] = a_i<<1 | carry;\n        carry = a_i>>(LIMB_T_BITS-1);\n    }\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        two_a[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = carry - borrow;\n    launder(mask);\n\n    for(i=0; i<n; i++)\n        two_a[i] = (two_a[i] & ~mask) | (tmp[i] & mask);\n\n    for (carry=0, i=0; i<n; i++) {\n        limbx = a[i] + (two_a[i] + (llimb_t)carry);\n        tmp[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        ret[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = carry - borrow;\n    launder(mask);\n\n    for(i=0; i<n; i++)\n        ret[i] = (ret[i] & ~mask) | (tmp[i] & mask);\n}\n\n#define MUL_BY_3_MOD_IMPL(bits) \\\ninline void mul_by_3_mod_##bits(vec##bits ret, const vec##bits a, \\\n                                const vec##bits p) \\\n{   mul_by_3_mod_n(ret, a, p, NLIMBS(bits));   }\n\nMUL_BY_3_MOD_IMPL(256)\nMUL_BY_3_MOD_IMPL(384)\n\nstatic void lshift_mod_n(limb_t ret[], const limb_t a[], size_t count,\n                         const limb_t p[], size_t n)\n{\n    __builtin_assume(count != 0);\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t mask, carry, borrow, tmp[n];\n    size_t i;\n\n    while (count--) {\n        for (carry=0, i=0; i<n; i++) {\n            limb_t a_i = a[i];\n            tmp[i] = a_i<<1 | carry;\n            carry = a_i>>(LIMB_T_BITS-1);\n        }\n\n        for (borrow=0, i=0; i<n; i++) {\n            limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n            ret[i] = (limb_t)limbx;\n            borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n        }\n\n        mask = carry - borrow;\n        launder(mask);\n\n        for(i=0; i<n; i++)\n            ret[i] = (ret[i] & ~mask) | (tmp[i] & mask);\n\n        a = ret;\n    }\n}\n\n#define LSHIFT_MOD_IMPL(bits) \\\ninline void lshift_mod_##bits(vec##bits ret, const vec##bits a, size_t count, \\\n                              const vec##bits p) \\\n{   lshift_mod_n(ret, a, count, p, NLIMBS(bits));   }\n\nLSHIFT_MOD_IMPL(256)\nLSHIFT_MOD_IMPL(384)\n\nstatic void cneg_mod_n(limb_t ret[], const limb_t a[], bool_t flag,\n                       const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t borrow, mask, tmp[n];\n    size_t i;\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = p[i] - (a[i] + (llimb_t)borrow);\n        tmp[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    flag &= vec_is_zero(a, sizeof(tmp)) ^ 1;\n    mask = (limb_t)0 - flag;\n\n    for(i=0; i<n; i++)\n        ret[i] = (a[i] & ~mask) | (tmp[i] & mask);\n}\n\n#define CNEG_MOD_IMPL(bits) \\\ninline void cneg_mod_##bits(vec##bits ret, const vec##bits a, bool_t flag, \\\n                            const vec##bits p) \\\n{   cneg_mod_n(ret, a, flag, p, NLIMBS(bits));   }\n\nCNEG_MOD_IMPL(256)\nCNEG_MOD_IMPL(384)\n\nstatic limb_t check_mod_n(const byte a[], const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t borrow, ai, acc;\n    size_t i, j;\n\n    for (acc=borrow=0, i=0; i<n; i++) {\n        for (ai=0, j=0; j<8*sizeof(limb_t); j+=8)\n            ai |= (limb_t)(*a++) << j;\n        acc |= ai;\n        limbx = ai - (p[i] + (llimb_t)borrow);\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    return borrow & (is_zero(acc) ^ 1);\n}\n\n#define CHECK_MOD_IMPL(bits) \\\ninline limb_t check_mod_##bits(const pow##bits a, const vec##bits p) \\\n{   return check_mod_n(a, p, NLIMBS(bits));   }\n\nCHECK_MOD_IMPL(256)\n\nstatic limb_t add_n_check_mod_n(byte ret[], const byte a[], const byte b[],\n                                            const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    limb_t ret_[n], a_[n], b_[n], zero;\n\n    limbs_from_le_bytes(a_, a, sizeof(a_));\n    limbs_from_le_bytes(b_, b, sizeof(b_));\n\n    add_mod_n(ret_, a_, b_, p, n);\n    zero = vec_is_zero(ret_, sizeof(ret_));\n\n    le_bytes_from_limbs(ret, ret_, sizeof(ret_));\n\n    return zero^1;\n}\n\n#define ADD_N_CHECK_MOD_IMPL(bits) \\\ninline limb_t add_n_check_mod_##bits(pow##bits ret, const pow##bits a, \\\n                                     const pow##bits b, const vec##bits p) \\\n{   return add_n_check_mod_n(ret, a, b, p, NLIMBS(bits));   }\n\nADD_N_CHECK_MOD_IMPL(256)\n\nstatic limb_t sub_n_check_mod_n(byte ret[], const byte a[], const byte b[],\n                                            const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    limb_t ret_[n], a_[n], b_[n], zero;\n\n    limbs_from_le_bytes(a_, a, sizeof(a_));\n    limbs_from_le_bytes(b_, b, sizeof(b_));\n\n    sub_mod_n(ret_, a_, b_, p, n);\n    zero = vec_is_zero(ret_, sizeof(ret_));\n\n    le_bytes_from_limbs(ret, ret_, sizeof(ret_));\n\n    return zero^1;\n}\n\n#define SUB_N_CHECK_MOD_IMPL(bits) \\\ninline limb_t sub_n_check_mod_##bits(pow##bits ret, const pow##bits a, \\\n                                     const pow##bits b, const vec##bits p) \\\n{   return sub_n_check_mod_n(ret, a, b, p, NLIMBS(bits));   }\n\nSUB_N_CHECK_MOD_IMPL(256)\n\nstatic void from_mont_n(limb_t ret[], const limb_t a[],\n                        const limb_t p[], limb_t n0, size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t mask, borrow, mx, hi, tmp[n];\n    size_t i, j;\n\n    for (j=0; j<n; j++) {\n        mx = n0*a[0];\n        limbx = (mx * (llimb_t)p[0]) + a[0];\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n        for (i=1; i<n; i++) {\n            limbx = (mx * (llimb_t)p[i] + hi) + a[i];\n            tmp[i-1] = (limb_t)limbx;\n            hi = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n        tmp[i-1] = hi;\n        a = tmp;\n    }\n\n    /* this is needed only if input can be non-fully-reduced */\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        ret[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = 0 - borrow;\n    launder(mask);\n\n    for(i=0; i<n; i++)\n        ret[i] = (ret[i] & ~mask) | (tmp[i] & mask);\n}\n\n#define FROM_MONT_IMPL(bits) \\\ninline void from_mont_##bits(vec##bits ret, const vec##bits a, \\\n                             const vec##bits p, limb_t n0) \\\n{   from_mont_n(ret, a, p, n0, NLIMBS(bits));   }\n\nFROM_MONT_IMPL(256)\nFROM_MONT_IMPL(384)\n\nstatic void redc_mont_n(limb_t ret[], const limb_t a[],\n                        const limb_t p[], limb_t n0, size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t mask, carry, borrow, mx, hi, tmp[n];\n    const limb_t *b = a;\n    size_t i, j;\n\n    for (j=0; j<n; j++) {\n        mx = n0*b[0];\n        limbx = (mx * (llimb_t)p[0]) + b[0];\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n        for (i=1; i<n; i++) {\n            limbx = (mx * (llimb_t)p[i] + hi) + b[i];\n            tmp[i-1] = (limb_t)limbx;\n            hi = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n        tmp[i-1] = hi;\n        b = tmp;\n    }\n\n    for (carry=0, i=0; i<n; i++) {\n        limbx = a[n+i] + (tmp[i] + (llimb_t)carry);\n        tmp[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        ret[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = carry - borrow;\n    launder(mask);\n\n    for(i=0; i<n; i++)\n        ret[i] = (ret[i] & ~mask) | (tmp[i] & mask);\n}\n\n#define REDC_MONT_IMPL(bits, bits2) \\\ninline void redc_mont_##bits(vec##bits ret, const vec##bits2 a, \\\n                             const vec##bits p, limb_t n0) \\\n{   redc_mont_n(ret, a, p, n0, NLIMBS(bits));   }\n\nREDC_MONT_IMPL(256, 512)\nREDC_MONT_IMPL(384, 768)\n\nstatic void rshift_mod_n(limb_t ret[], const limb_t a[], size_t count,\n                         const limb_t p[], size_t n)\n{\n    __builtin_assume(count != 0);\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t mask, carry, limb, next;\n    size_t i;\n\n    while (count--) {\n        mask = 0 - (a[0] & 1);\n        launder(mask);\n        for (carry=0, i=0; i<n; i++) {\n            limbx = a[i] + ((p[i]&mask) + (llimb_t)carry);\n            ret[i] = (limb_t)limbx;\n            carry = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n\n        for (next=ret[0], i=0; i<n-1; i++) {\n            limb = next >> 1;\n            next = ret[i+1];\n            ret[i] = limb | next << (LIMB_T_BITS-1);\n        }\n        ret[i] = next >> 1 | carry << (LIMB_T_BITS-1);\n\n        a = ret;\n    }\n}\n\n#define RSHIFT_MOD_IMPL(bits) \\\ninline void rshift_mod_##bits(vec##bits ret, const vec##bits a, size_t count, \\\n                              const vec##bits p) \\\n{   rshift_mod_n(ret, a, count, p, NLIMBS(bits));   }\n\nRSHIFT_MOD_IMPL(256)\nRSHIFT_MOD_IMPL(384)\n\n#define DIV_BY_2_MOD_IMPL(bits) \\\ninline void div_by_2_mod_##bits(vec##bits ret, const vec##bits a, \\\n                                const vec##bits p) \\\n{   rshift_mod_n(ret, a, 1, p, NLIMBS(bits));   }\n\nDIV_BY_2_MOD_IMPL(384)\n\nstatic limb_t sgn0_pty_mod_n(const limb_t a[], const limb_t p[], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t carry, borrow, ret, tmp[n];\n    size_t i;\n\n    ret = a[0] & 1; /* parity */\n\n    for (carry=0, i=0; i<n; i++) {\n        limb_t a_i = a[i];\n        tmp[i] = a_i<<1 | carry;\n        carry = a_i>>(LIMB_T_BITS-1);\n    }\n\n    for (borrow=0, i=0; i<n; i++) {\n        limbx = tmp[i] - (p[i] + (llimb_t)borrow);\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    ret |= ((carry - borrow) & 2) ^ 2;\n\n    return ret;\n}\n\ninline limb_t sgn0_pty_mod_384(const vec384 a, const vec384 p)\n{   return sgn0_pty_mod_n(a, p, NLIMBS(384));   }\n\ninline limb_t sgn0_pty_mont_384(const vec384 a, const vec384 p, limb_t n0)\n{\n    vec384 tmp;\n\n    from_mont_n(tmp, a, p, n0, NLIMBS(384));\n\n    return sgn0_pty_mod_n(tmp, p, NLIMBS(384));\n}\n\ninline limb_t sgn0_pty_mod_384x(const vec384x a, const vec384 p)\n{\n    limb_t re, im, sign, prty;\n\n    re = sgn0_pty_mod_n(a[0], p, NLIMBS(384));\n    im = sgn0_pty_mod_n(a[1], p, NLIMBS(384));\n\n    /* a->im!=0 ? sgn0(a->im) : sgn0(a->re) */\n    sign = (limb_t)0 - vec_is_zero(a[1], sizeof(vec384));\n    sign = (re & sign) | (im & ~sign);\n\n    /* a->re==0 ? prty(a->im) : prty(a->re) */\n    prty = (limb_t)0 - vec_is_zero(a[0], sizeof(vec384));\n    prty = (im & prty) | (re & ~prty);\n\n    return (sign & 2) | (prty & 1);\n}\n\ninline limb_t sgn0_pty_mont_384x(const vec384x a, const vec384 p, limb_t n0)\n{\n    vec384x tmp;\n\n    from_mont_n(tmp[0], a[0], p, n0, NLIMBS(384));\n    from_mont_n(tmp[1], a[1], p, n0, NLIMBS(384));\n\n    return sgn0_pty_mod_384x(tmp, p);\n}\n\nvoid mul_mont_384x(vec384x ret, const vec384x a, const vec384x b,\n                          const vec384 p, limb_t n0)\n{\n    vec384 aa, bb, cc;\n\n    add_mod_n(aa, a[0], a[1], p, NLIMBS(384));\n    add_mod_n(bb, b[0], b[1], p, NLIMBS(384));\n    mul_mont_n(bb, bb, aa, p, n0, NLIMBS(384));\n    mul_mont_n(aa, a[0], b[0], p, n0, NLIMBS(384));\n    mul_mont_n(cc, a[1], b[1], p, n0, NLIMBS(384));\n    sub_mod_n(ret[0], aa, cc, p, NLIMBS(384));\n    sub_mod_n(ret[1], bb, aa, p, NLIMBS(384));\n    sub_mod_n(ret[1], ret[1], cc, p, NLIMBS(384));\n}\n\n/*\n * mul_mont_n without final conditional subtraction, which implies\n * that modulus is one bit short, which in turn means that there are\n * no carries to handle between iterations...\n */\nstatic void mul_mont_nonred_n(limb_t ret[], const limb_t a[], const limb_t b[],\n                              const limb_t p[], limb_t n0, size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t mx, hi, tmp[n+1];\n    size_t i, j;\n\n    for (mx=b[0], hi=0, i=0; i<n; i++) {\n        limbx = (mx * (llimb_t)a[i]) + hi;\n        tmp[i] = (limb_t)limbx;\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n    mx = n0*tmp[0];\n    tmp[i] = hi;\n\n    for (j=0; ; ) {\n        limbx = (mx * (llimb_t)p[0]) + tmp[0];\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n        for (i=1; i<n; i++) {\n            limbx = (mx * (llimb_t)p[i] + hi) + tmp[i];\n            tmp[i-1] = (limb_t)limbx;\n            hi = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n        tmp[i-1] = tmp[i] + hi;\n\n        if (++j==n)\n            break;\n\n        for (mx=b[j], hi=0, i=0; i<n; i++) {\n            limbx = (mx * (llimb_t)a[i] + hi) + tmp[i];\n            tmp[i] = (limb_t)limbx;\n            hi = (limb_t)(limbx >> LIMB_T_BITS);\n        }\n        mx = n0*tmp[0];\n        tmp[i] = hi;\n    }\n\n    vec_copy(ret, tmp, sizeof(tmp)-sizeof(limb_t));\n}\n\nvoid sqr_n_mul_mont_383(vec384 ret, const vec384 a, size_t count,\n                        const vec384 p, limb_t n0, const vec384 b)\n{\n    __builtin_assume(count != 0);\n    while(count--) {\n        mul_mont_nonred_n(ret, a, a, p, n0, NLIMBS(384));\n        a = ret;\n    }\n    mul_mont_n(ret, ret, b, p, n0, NLIMBS(384));\n}\n\nvoid sqr_mont_382x(vec384x ret, const vec384x a,\n                          const vec384 p, limb_t n0)\n{\n    llimb_t limbx;\n    limb_t mask, carry, borrow;\n    size_t i;\n    vec384 t0, t1;\n\n    /* \"add_mod_n(t0, a[0], a[1], p, NLIMBS(384));\" */\n    for (carry=0, i=0; i<NLIMBS(384); i++) {\n        limbx = a[0][i] + (a[1][i] + (llimb_t)carry);\n        t0[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    /* \"sub_mod_n(t1, a[0], a[1], p, NLIMBS(384));\" */\n    for (borrow=0, i=0; i<NLIMBS(384); i++) {\n        limbx = a[0][i] - (a[1][i] + (llimb_t)borrow);\n        t1[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n    mask = 0 - borrow;\n    launder(mask);\n\n    /* \"mul_mont_n(ret[1], a[0], a[1], p, n0, NLIMBS(384));\" */\n    mul_mont_nonred_n(ret[1], a[0], a[1], p, n0, NLIMBS(384));\n\n    /* \"add_mod_n(ret[1], ret[1], ret[1], p, NLIMBS(384));\" */\n    for (carry=0, i=0; i<NLIMBS(384); i++) {\n        limb_t a_i = ret[1][i];\n        ret[1][i] = a_i<<1 | carry;\n        carry = a_i>>(LIMB_T_BITS-1);\n    }\n\n    /* \"mul_mont_n(ret[0], t0, t1, p, n0, NLIMBS(384));\" */\n    mul_mont_nonred_n(ret[0], t0, t1, p, n0, NLIMBS(384));\n\n    /* account for t1's sign... */\n    for (borrow=0, i=0; i<NLIMBS(384); i++) {\n        limbx = ret[0][i] - ((t0[i] & mask) + (llimb_t)borrow);\n        ret[0][i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n    mask = 0 - borrow;\n    launder(mask);\n    for (carry=0, i=0; i<NLIMBS(384); i++) {\n        limbx = ret[0][i] + ((p[i] & mask) + (llimb_t)carry);\n        ret[0][i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n}\n\n#if defined(__GNUC__) || defined(__clang__)\n# define MSB(x) ({ limb_t ret = (x) >> (LIMB_T_BITS-1); launder(ret); ret; })\n#else\n# define MSB(x) ((x) >> (LIMB_T_BITS-1))\n#endif\n\nstatic size_t num_bits(limb_t l)\n{\n    limb_t x, mask;\n    size_t bits = is_zero(l) ^ 1;\n\n    if (sizeof(limb_t) == 8) {\n        x = l >> (32 & (8*sizeof(limb_t)-1));\n        mask = 0 - MSB(0 - x);\n        bits += 32 & mask;\n        l ^= (x ^ l) & mask;\n    }\n\n    x = l >> 16;\n    mask = 0 - MSB(0 - x);\n    bits += 16 & mask;\n    l ^= (x ^ l) & mask;\n\n    x = l >> 8;\n    mask = 0 - MSB(0 - x);\n    bits += 8 & mask;\n    l ^= (x ^ l) & mask;\n\n    x = l >> 4;\n    mask = 0 - MSB(0 - x);\n    bits += 4 & mask;\n    l ^= (x ^ l) & mask;\n\n    x = l >> 2;\n    mask = 0 - MSB(0 - x);\n    bits += 2 & mask;\n    l ^= (x ^ l) & mask;\n\n    bits += l >> 1;\n\n    return bits;\n}\n\n#if defined(__clang_major__) && __clang_major__>7\n__attribute__((optnone))\n#endif\nstatic limb_t lshift_2(limb_t hi, limb_t lo, size_t l)\n{\n    size_t r = LIMB_T_BITS - l;\n    limb_t mask = 0 - (is_zero(l)^1);\n    return (hi << (l&(LIMB_T_BITS-1))) | ((lo & mask) >> (r&(LIMB_T_BITS-1)));\n}\n\n/*\n * https://eprint.iacr.org/2020/972 with 'k' being LIMB_T_BITS-1.\n */\nstatic void ab_approximation_n(limb_t a_[2], const limb_t a[],\n                               limb_t b_[2], const limb_t b[], size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    limb_t a_hi, a_lo, b_hi, b_lo, mask;\n    size_t i;\n\n    i = n-1;\n    a_hi = a[i],    a_lo = a[i-1];\n    b_hi = b[i],    b_lo = b[i-1];\n    for (i--; --i;) {\n        mask = 0 - is_zero(a_hi | b_hi);\n        a_hi = ((a_lo ^ a_hi) & mask) ^ a_hi;\n        b_hi = ((b_lo ^ b_hi) & mask) ^ b_hi;\n        a_lo = ((a[i] ^ a_lo) & mask) ^ a_lo;\n        b_lo = ((b[i] ^ b_lo) & mask) ^ b_lo;\n    }\n    i = LIMB_T_BITS - num_bits(a_hi | b_hi);\n    /* |i| can be LIMB_T_BITS if all a[2..]|b[2..] were zeros */\n\n    a_[0] = a[0], a_[1] = lshift_2(a_hi, a_lo, i);\n    b_[0] = b[0], b_[1] = lshift_2(b_hi, b_lo, i);\n}\n\ntypedef struct { limb_t f0, g0, f1, g1; } factors;\n\nstatic void inner_loop_n(factors *fg, const limb_t a_[2], const limb_t b_[2],\n                         size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t f0 = 1, g0 = 0, f1 = 0, g1 = 1;\n    limb_t a_lo, a_hi, b_lo, b_hi, t_lo, t_hi, odd, borrow, xorm;\n\n    a_lo = a_[0], a_hi = a_[1];\n    b_lo = b_[0], b_hi = b_[1];\n\n    while(n--) {\n        odd = 0 - (a_lo&1);\n\n        /* a_ -= b_ if a_ is odd */\n        t_lo = a_lo, t_hi = a_hi;\n        limbx = a_lo - (llimb_t)(b_lo & odd);\n        a_lo = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n        limbx = a_hi - ((llimb_t)(b_hi & odd) + borrow);\n        a_hi = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS);\n\n        /* negate a_-b_ if it borrowed */\n        a_lo ^= borrow;\n        a_hi ^= borrow;\n        limbx = a_lo + (llimb_t)(borrow & 1);\n        a_lo = (limb_t)limbx;\n        a_hi += (limb_t)(limbx >> LIMB_T_BITS) & 1;\n\n        /* b_=a_ if a_-b_ borrowed */\n        b_lo = ((t_lo ^ b_lo) & borrow) ^ b_lo;\n        b_hi = ((t_hi ^ b_hi) & borrow) ^ b_hi;\n\n        /* exchange f0 and f1 if a_-b_ borrowed */\n        xorm = (f0 ^ f1) & borrow;\n        f0 ^= xorm;\n        f1 ^= xorm;\n\n        /* exchange g0 and g1 if a_-b_ borrowed */\n        xorm = (g0 ^ g1) & borrow;\n        g0 ^= xorm;\n        g1 ^= xorm;\n\n        /* subtract if a_ was odd */\n        f0 -= f1 & odd;\n        g0 -= g1 & odd;\n\n        f1 <<= 1;\n        g1 <<= 1;\n        a_lo >>= 1; a_lo |= a_hi << (LIMB_T_BITS-1);\n        a_hi >>= 1;\n    }\n\n    fg->f0 = f0, fg->g0 = g0, fg->f1 = f1, fg->g1= g1;\n}\n\nstatic limb_t cneg_n(limb_t ret[], const limb_t a[], limb_t neg, size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx = 0;\n    limb_t carry;\n    size_t i;\n\n    for (carry=neg&1, i=0; i<n; i++) {\n        limbx = (llimb_t)(a[i] ^ neg) + carry;\n        ret[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    return 0 - MSB((limb_t)limbx);\n}\n\nstatic limb_t add_n(limb_t ret[], const limb_t a[], limb_t b[], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t carry;\n    size_t i;\n\n    for (carry=0, i=0; i<n; i++) {\n        limbx = a[i] + (b[i] + (llimb_t)carry);\n        ret[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    return carry;\n}\n\nstatic limb_t umul_n(limb_t ret[], const limb_t a[], limb_t b, size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t hi;\n    size_t i;\n\n    for (hi=0, i=0; i<n; i++) {\n        limbx = (b * (llimb_t)a[i]) + hi;\n        ret[i] = (limb_t)limbx;\n        hi = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n\n    return hi;\n}\n\nstatic limb_t smul_n_shift_n(limb_t ret[], const limb_t a[], limb_t *f_,\n                                           const limb_t b[], limb_t *g_,\n                                           size_t n)\n{\n    __builtin_assume(n != 0);\n    limb_t a_[n+1], b_[n+1], f, g, neg, carry, hi;\n    size_t i;\n\n    /* |a|*|f_| */\n    f = *f_;\n    neg = 0 - MSB(f);\n    f = (f ^ neg) - neg;            /* ensure |f| is positive */\n    (void)cneg_n(a_, a, neg, n);\n    hi = umul_n(a_, a_, f, n);\n    a_[n] = hi - (f & neg);\n\n    /* |b|*|g_| */\n    g = *g_;\n    neg = 0 - MSB(g);\n    g = (g ^ neg) - neg;            /* ensure |g| is positive */\n    (void)cneg_n(b_, b, neg, n);\n    hi = umul_n(b_, b_, g, n);\n    b_[n] = hi - (g & neg);\n\n    /* |a|*|f_| + |b|*|g_| */\n    (void)add_n(a_, a_, b_, n+1);\n\n    /* (|a|*|f_| + |b|*|g_|) >> k */\n    for (carry=a_[0], i=0; i<n; i++) {\n        hi = carry >> (LIMB_T_BITS-2);\n        carry = a_[i+1];\n        ret[i] = hi | (carry << 2);\n    }\n\n    /* ensure result is non-negative, fix up |f_| and |g_| accordingly */\n    neg = 0 - MSB(carry);\n    *f_ = (*f_ ^ neg) - neg;\n    *g_ = (*g_ ^ neg) - neg;\n    (void)cneg_n(ret, ret, neg, n);\n\n    return neg;\n}\n\nstatic limb_t smul_2n(limb_t ret[], const limb_t u[], limb_t f,\n                                    const limb_t v[], limb_t g, size_t n)\n{\n    __builtin_assume(n != 0);\n    limb_t u_[n], v_[n], neg, hi;\n\n    /* |u|*|f_| */\n    neg = 0 - MSB(f);\n    f = (f ^ neg) - neg;            /* ensure |f| is positive */\n    neg = cneg_n(u_, u, neg, n);\n    hi = umul_n(u_, u_, f, n) - (f&neg);\n\n    /* |v|*|g_| */\n    neg = 0 - MSB(g);\n    g = (g ^ neg) - neg;            /* ensure |g| is positive */\n    neg = cneg_n(v_, v, neg, n);\n    hi += umul_n(v_, v_, g, n) - (g&neg);\n\n    /* |u|*|f_| + |v|*|g_| */\n    hi += add_n(ret, u_, v_, n);\n\n    return hi;\n}\n\nstatic void ct_inverse_mod_n(limb_t ret[], const limb_t inp[],\n                             const limb_t mod[], const limb_t modx[], size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t a[n], b[n], u[2*n], v[2*n], t[2*n];\n    limb_t a_[2], b_[2], sign, carry, top;\n    factors fg;\n    size_t i;\n\n    vec_copy(a, inp, sizeof(a));\n    vec_copy(b, mod, sizeof(b));\n    vec_zero(u, sizeof(u)); u[0] = 1;\n    vec_zero(v, sizeof(v));\n\n    for (i=0; i<(2*n*LIMB_T_BITS)/(LIMB_T_BITS-2); i++) {\n        ab_approximation_n(a_, a, b_, b, n);\n        inner_loop_n(&fg, a_, b_, LIMB_T_BITS-2);\n        (void)smul_n_shift_n(t, a, &fg.f0, b, &fg.g0, n);\n        (void)smul_n_shift_n(b, a, &fg.f1, b, &fg.g1, n);\n        vec_copy(a, t, sizeof(a));\n        smul_2n(t, u, fg.f0, v, fg.g0, 2*n);\n        smul_2n(v, u, fg.f1, v, fg.g1, 2*n);\n        vec_copy(u, t, sizeof(u));\n    }\n\n    inner_loop_n(&fg, a, b, (2*n*LIMB_T_BITS)%(LIMB_T_BITS-2));\n    top = smul_2n(ret, u, fg.f1, v, fg.g1, 2*n);\n\n    sign = 0 - MSB(top);    /* top is 1, 0 or -1 */\n    for (carry=0, i=0; i<n; i++) {\n        limbx = ret[n+i] + ((modx[i] & sign) + (llimb_t)carry);\n        ret[n+i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n    top += carry;\n    sign = 0 - top;         /* top is 1, 0 or -1 */\n    top |= sign;\n    for (i=0; i<n; i++)\n        a[i] = modx[i] & top;\n    (void)cneg_n(a, a, 0 - MSB(sign), n);\n    add_n(ret+n, ret+n, a, n);\n}\n\n#define CT_INVERSE_MOD_IMPL(bits, bits2) \\\ninline void ct_inverse_mod_##bits(vec##bits2 ret, const vec##bits inp, \\\n                                  const vec##bits mod, const vec##bits modx) \\\n{   ct_inverse_mod_n(ret, inp, mod, modx, NLIMBS(bits));   }\n\nCT_INVERSE_MOD_IMPL(256, 512)\nCT_INVERSE_MOD_IMPL(384, 768)\n\n/*\n * Copy of inner_loop_n above, but with |L| updates.\n */\nstatic limb_t legendre_loop_n(limb_t L, factors *fg, const limb_t a_[2],\n                              const limb_t b_[2], size_t n)\n{\n    __builtin_assume(n != 0);\n    llimb_t limbx;\n    limb_t f0 = 1, g0 = 0, f1 = 0, g1 = 1;\n    limb_t a_lo, a_hi, b_lo, b_hi, t_lo, t_hi, odd, borrow, xorm;\n\n    a_lo = a_[0], a_hi = a_[1];\n    b_lo = b_[0], b_hi = b_[1];\n\n    while(n--) {\n        odd = 0 - (a_lo&1);\n\n        /* a_ -= b_ if a_ is odd */\n        t_lo = a_lo, t_hi = a_hi;\n        limbx = a_lo - (llimb_t)(b_lo & odd);\n        a_lo = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n        limbx = a_hi - ((llimb_t)(b_hi & odd) + borrow);\n        a_hi = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS);\n\n        L += ((t_lo & b_lo) >> 1) & borrow;\n\n        /* negate a_-b_ if it borrowed */\n        a_lo ^= borrow;\n        a_hi ^= borrow;\n        limbx = a_lo + (llimb_t)(borrow & 1);\n        a_lo = (limb_t)limbx;\n        a_hi += (limb_t)(limbx >> LIMB_T_BITS) & 1;\n\n        /* b_=a_ if a_-b_ borrowed */\n        b_lo = ((t_lo ^ b_lo) & borrow) ^ b_lo;\n        b_hi = ((t_hi ^ b_hi) & borrow) ^ b_hi;\n\n        /* exchange f0 and f1 if a_-b_ borrowed */\n        xorm = (f0 ^ f1) & borrow;\n        f0 ^= xorm;\n        f1 ^= xorm;\n\n        /* exchange g0 and g1 if a_-b_ borrowed */\n        xorm = (g0 ^ g1) & borrow;\n        g0 ^= xorm;\n        g1 ^= xorm;\n\n        /* subtract if a_ was odd */\n        f0 -= f1 & odd;\n        g0 -= g1 & odd;\n\n        f1 <<= 1;\n        g1 <<= 1;\n        a_lo >>= 1; a_lo |= a_hi << (LIMB_T_BITS-1);\n        a_hi >>= 1;\n\n        L += (b_lo + 2) >> 2;\n    }\n\n    fg->f0 = f0, fg->g0 = g0, fg->f1 = f1, fg->g1 = g1;\n\n    return L;\n}\n\nstatic bool_t ct_is_sqr_mod_n(const limb_t inp[], const limb_t mod[], size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    limb_t a[n], b[n], t[n];\n    limb_t a_[2], b_[2], neg, L = 0;\n    factors fg;\n    size_t i;\n\n    vec_copy(a, inp, sizeof(a));\n    vec_copy(b, mod, sizeof(b));\n\n    for (i=0; i<(2*n*LIMB_T_BITS)/(LIMB_T_BITS-2); i++) {\n        ab_approximation_n(a_, a, b_, b, n);\n        L = legendre_loop_n(L, &fg, a_, b_, LIMB_T_BITS-2);\n        neg = smul_n_shift_n(t, a, &fg.f0, b, &fg.g0, n);\n        (void)smul_n_shift_n(b, a, &fg.f1, b, &fg.g1, n);\n        vec_copy(a, t, sizeof(a));\n        L += (b[0] >> 1) & neg;\n    }\n\n    L = legendre_loop_n(L, &fg, a, b, (2*n*LIMB_T_BITS)%(LIMB_T_BITS-2));\n\n    return (L & 1) ^ 1;\n}\n\n#define CT_IS_SQR_MOD_IMPL(bits) \\\ninline bool_t ct_is_square_mod_##bits(const vec##bits inp, \\\n                                      const vec##bits mod) \\\n{   return ct_is_sqr_mod_n(inp, mod, NLIMBS(bits));   }\n\nCT_IS_SQR_MOD_IMPL(384)\n\n/*\n * |div_top| points at two most significant limbs of the dividend, |d_hi|\n * and |d_lo| are two most significant limbs of the divisor. If divisor\n * is only one limb, it is to be passed in |d_hi| with zero in |d_lo|.\n * The divisor is required to be \"bitwise left-aligned,\" and dividend's\n * top limbs to be not larger than the divisor's. The latter limitation\n * can be problematic in the first iteration of multi-precision division,\n * where in most general case the condition would have to be \"smaller.\"\n * The subroutine considers four limbs, two of which are \"overlapping,\"\n * hence the name... Another way to look at it is to think of the pair\n * of the dividend's limbs being suffixed with a zero:\n *   +-------+-------+-------+\n * R |       |       |   0   |\n *   +-------+-------+-------+\n *           +-------+-------+\n * D         |       |       |\n *           +-------+-------+\n */\nlimb_t div_3_limbs(const limb_t div_top[2], limb_t d_lo, limb_t d_hi)\n{\n    llimb_t Rx;\n    limb_t r_lo = div_top[0], r_hi = div_top[1];\n    limb_t Q = 0, mask, borrow, rx;\n    size_t i;\n\n    for (i = 0; i < LIMB_T_BITS; i++) {\n        /* \"borrow, Rx = R - D\" */\n        Rx = (llimb_t)r_lo - d_lo;\n        rx = (limb_t)Rx;\n        borrow = (limb_t)(Rx >> LIMB_T_BITS) & 1;\n        Rx = r_hi - (d_hi + (llimb_t)borrow);\n        borrow = (limb_t)(Rx >> LIMB_T_BITS);\n\n        /* \"if (R >= D) R -= D\" */\n        r_lo = ((r_lo ^ rx) & borrow) ^ rx;\n        rx = (limb_t)Rx;\n        r_hi = ((r_hi ^ rx) & borrow) ^ rx;\n\n        Q <<= 1;\n        Q |= ~borrow & 1;\n\n        /* \"D >>= 1\" */\n        d_lo >>= 1; d_lo |= d_hi << (LIMB_T_BITS - 1);\n        d_hi >>= 1;\n    }\n\n    mask = 0 - MSB(Q);  /* does it overflow? */\n\n    /* \"borrow, Rx = R - D\" */\n    Rx = (llimb_t)r_lo - d_lo;\n    rx = (limb_t)Rx;\n    borrow = (limb_t)(Rx >> LIMB_T_BITS) & 1;\n    Rx = r_hi - (d_hi + (llimb_t)borrow);\n    borrow = (limb_t)(Rx >> LIMB_T_BITS) & 1;\n\n    Q <<= 1;\n    Q |= borrow ^ 1;\n\n    return (Q | mask);\n}\n\nstatic limb_t quot_rem_n(limb_t *div_rem, const limb_t *divisor,\n                                          limb_t quotient, size_t n)\n{\n    __builtin_assume(n != 0 && n%2 == 0);\n    llimb_t limbx;\n    limb_t tmp[n+1], carry, mask, borrow;\n    size_t i;\n\n    /* divisor*quotient */\n    for (carry=0, i=0; i<n; i++) {\n        limbx = (quotient * (llimb_t)divisor[i]) + carry;\n        tmp[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS);\n    }\n    tmp[i] = carry;\n\n    /* remainder = dividend - divisor*quotient */\n    for (borrow=0, i=0; i<=n; i++) {\n        limbx = div_rem[i] - (tmp[i] + (llimb_t)borrow);\n        tmp[i] = (limb_t)limbx;\n        borrow = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    mask = 0 - borrow;\n    launder(mask);\n\n    /* if quotient was off by one, add divisor to the remainder */\n    for (carry=0, i=0; i<n; i++) {\n        limbx = tmp[i] + ((divisor[i] & mask) + (llimb_t)carry);\n        div_rem[i] = (limb_t)limbx;\n        carry = (limb_t)(limbx >> LIMB_T_BITS) & 1;\n    }\n\n    return (div_rem[i] = quotient + mask);\n}\n\ninline limb_t quot_rem_128(limb_t *div_rem, const limb_t *divisor,\n                                            limb_t quotient)\n{   return quot_rem_n(div_rem, divisor, quotient, NLIMBS(128));   }\n\ninline limb_t quot_rem_64(limb_t *div_rem, const limb_t *divisor,\n                                           limb_t quotient)\n{   return quot_rem_n(div_rem, divisor, quotient, NLIMBS(64));   }\n\n/*\n * Unlock reference implementations in vect.c\n */\n#define mul_by_8_mod_384 mul_by_8_mod_384\n#define mul_by_8_mod_384x mul_by_8_mod_384x\n#define mul_by_3_mod_384x mul_by_3_mod_384x\n#define mul_by_1_plus_i_mod_384x mul_by_1_plus_i_mod_384x\n#define add_mod_384x add_mod_384x\n#define sub_mod_384x sub_mod_384x\n#define lshift_mod_384x lshift_mod_384x\n#define sqr_mont_384x sqr_mont_384x\n\ninline void vec_prefetch(const void *ptr, size_t len)\n{   (void)ptr; (void)len;   }\n\n/*\n * SHA-256\n */\n#define ROTR(x,n)\t((x)>>n | (x)<<(32-n))\n#define Sigma0(x)\t(ROTR((x),2) ^ ROTR((x),13) ^ ROTR((x),22))\n#define Sigma1(x)\t(ROTR((x),6) ^ ROTR((x),11) ^ ROTR((x),25))\n#define sigma0(x)\t(ROTR((x),7) ^ ROTR((x),18) ^ ((x)>>3))\n#define sigma1(x)\t(ROTR((x),17) ^ ROTR((x),19) ^ ((x)>>10))\n#define Ch(x,y,z)\t(((x) & (y)) ^ ((~(x)) & (z)))\n#define Maj(x,y,z)\t(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))\n\nvoid blst_sha256_block_data_order(unsigned int *v, const void *inp,\n                                                   size_t blocks)\n{\n    static const unsigned int K256[64] = {\n        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,\n        0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,\n        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,\n        0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,\n        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,\n        0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,\n        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,\n        0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,\n        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,\n        0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,\n        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,\n        0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,\n        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,\n        0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,\n        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,\n        0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2\n    };\n    unsigned int X[16], l, a, b, c, d, e, f, g, h, s0, s1, T1, T2;\n    const unsigned char *data = inp;\n    size_t round;\n\n    a = v[0];\n    b = v[1];\n    c = v[2];\n    d = v[3];\n    e = v[4];\n    f = v[5];\n    g = v[6];\n    h = v[7];\n\n    while (blocks--) {\n        for (round = 0; round < 16; round++) {\n            l  = (unsigned int)data[0] << 24;\n            l |= (unsigned int)data[1] << 16;\n            l |= (unsigned int)data[2] << 8;\n            l |= (unsigned int)data[3];\n            data += 4;\n            T1 = X[round] = l;\n            T1 += h + Sigma1(e) + Ch(e, f, g) + K256[round];\n            T2 = Sigma0(a) + Maj(a, b, c);\n            h = g;\n            g = f;\n            f = e;\n            e = d + T1;\n            d = c;\n            c = b;\n            b = a;\n            a = T1 + T2;\n        }\n\n        for (; round < 64; round++) {\n            s0 = X[(round + 1) & 0x0f];\n            s0 = sigma0(s0);\n            s1 = X[(round + 14) & 0x0f];\n            s1 = sigma1(s1);\n\n            T1 = X[round & 0xf] += s0 + s1 + X[(round + 9) & 0xf];\n            T1 += h + Sigma1(e) + Ch(e, f, g) + K256[round];\n            T2 = Sigma0(a) + Maj(a, b, c);\n            h = g;\n            g = f;\n            f = e;\n            e = d + T1;\n            d = c;\n            c = b;\n            b = a;\n            a = T1 + T2;\n        }\n\n        a += v[0]; v[0] = a;\n        b += v[1]; v[1] = b;\n        c += v[2]; v[2] = c;\n        d += v[3]; v[3] = d;\n        e += v[4]; v[4] = e;\n        f += v[5]; v[5] = f;\n        g += v[6]; v[6] = g;\n        h += v[7]; v[7] = h;\n    }\n}\n#undef ROTR\n#undef Sigma0\n#undef Sigma1\n#undef sigma0\n#undef sigma1\n#undef Ch\n#undef Maj\n\nvoid blst_sha256_hcopy(unsigned int dst[8], const unsigned int src[8])\n{\n    size_t i;\n\n    for (i=0; i<8; i++)\n        dst[i] = src[i];\n}\n\nvoid blst_sha256_emit(unsigned char md[32], const unsigned int h[8])\n{\n    size_t i;\n\n    for (i=0; i<8; i++, md+=4) {\n        unsigned int h_i = h[i];\n        md[0] = (unsigned char)(h_i >> 24);\n        md[1] = (unsigned char)(h_i >> 16);\n        md[2] = (unsigned char)(h_i >> 8);\n        md[3] = (unsigned char)h_i;\n    }\n}\n\nvoid blst_sha256_bcopy(void *dst_, const void *src_, size_t len)\n{\n    unsigned char *dst = dst_;\n    const unsigned char *src = src_;\n    size_t i;\n\n    for (i=0; i<len; i++)\n        dst[i] = src[i];\n}\n"
  },
  {
    "path": "src/pairing.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"point.h\"\n#include \"fields.h\"\n\n/*\n * Line evaluations from  https://eprint.iacr.org/2010/354.pdf\n * with a twist moving common expression to line_by_Px2.\n */\nstatic void line_add(vec384fp6 line, POINTonE2 *T, const POINTonE2 *R,\n                                                   const POINTonE2_affine *Q)\n{\n    vec384x Z1Z1, U2, S2, H, HH, I, J, V;\n#if 1\n# define r line[1]\n#else\n    vec384x r;\n#endif\n\n    /*\n     * https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl\n     * with XYZ3 being |T|, XYZ1 - |R|, XY2 - |Q|, i.e. Q is affine\n     */\n    sqr_fp2(Z1Z1, R->Z);                /* Z1Z1 = Z1^2 */\n    mul_fp2(U2, Q->X, Z1Z1);            /* U2 = X2*Z1Z1 */\n\n    mul_fp2(S2, Q->Y, R->Z);\n    mul_fp2(S2, S2, Z1Z1);              /* S2 = Y2*Z1*Z1Z1 */\n\n    sub_fp2(H, U2, R->X);               /* H = U2-X1 */\n\n    sqr_fp2(HH, H);                     /* HH = H^2 */\n    add_fp2(I, HH, HH);\n    add_fp2(I, I, I);                   /* I = 4*HH */\n\n    mul_fp2(J, H, I);                   /* J = H*I */\n\n    sub_fp2(r, S2, R->Y);\n    add_fp2(r, r, r);                   /* r = 2*(S2-Y1) */\n\n    mul_fp2(V, R->X, I);                /* V = X1*I */\n\n    sqr_fp2(T->X, r);\n    sub_fp2(T->X, T->X, J);\n    sub_fp2(T->X, T->X, V);\n    sub_fp2(T->X, T->X, V);             /* X3 = r^2-J-2*V */\n\n    mul_fp2(J, J, R->Y);\n    sub_fp2(T->Y, V, T->X);\n    mul_fp2(T->Y, T->Y, r);\n    sub_fp2(T->Y, T->Y, J);\n    sub_fp2(T->Y, T->Y, J);             /* Y3 = r*(V-X3)-2*Y1*J */\n\n    add_fp2(T->Z, R->Z, H);\n    sqr_fp2(T->Z, T->Z);\n    sub_fp2(T->Z, T->Z, Z1Z1);\n    sub_fp2(T->Z, T->Z, HH);            /* Z3 = (Z1+H)^2-Z1Z1-HH */\n\n    /*\n     * line evaluation\n     */\n    mul_fp2(I, r, Q->X);\n    mul_fp2(J, Q->Y, T->Z);\n    sub_fp2(I, I, J);\n    add_fp2(line[0], I, I);          /* 2*(r*X2 - Y2*Z3) */\n#ifdef r\n# undef r\n#else\n    vec_copy(line[1], r, sizeof(r));\n#endif\n    vec_copy(line[2], T->Z, sizeof(T->Z));\n}\n\nstatic void line_dbl(vec384fp6 line, POINTonE2 *T, const POINTonE2 *Q)\n{\n    vec384x ZZ, A, B, C, D, E, F;\n\n    /*\n     * https://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-alnr\n     */\n    sqr_fp2(A, Q->X);                   /* A = X1^2 */\n    sqr_fp2(B, Q->Y);                   /* B = Y1^2 */\n    sqr_fp2(ZZ, Q->Z);                  /* ZZ = Z1^2 */\n    sqr_fp2(C, B);                      /* C = B^2 */\n\n    add_fp2(D, Q->X, B);                /* X1+B */\n    sqr_fp2(D, D);                      /* (X1+B)^2 */\n    sub_fp2(D, D, A);                   /* (X1+B)^2-A */\n    sub_fp2(D, D, C);                   /* (X1+B)^2-A-C */\n    add_fp2(D, D, D);                   /* D = 2*((X1+B)^2-A-C) */\n\n    mul_by_3_fp2(E, A);                 /* E = 3*A */\n    sqr_fp2(F, E);                      /* F = E^2 */\n\n    add_fp2(line[0], E, Q->X);          /* 3*A+X1 for line evaluation */\n\n    sub_fp2(T->X, F, D);\n    sub_fp2(T->X, T->X, D);             /* X3 = F-2*D */\n\n    add_fp2(T->Z, Q->Y, Q->Z);\n    sqr_fp2(T->Z, T->Z);\n    sub_fp2(T->Z, T->Z, B);\n    sub_fp2(T->Z, T->Z, ZZ);            /* Z3 = (Y1+Z1)^2-B-ZZ */\n\n    mul_by_8_fp2(C, C);                 /* 8*C */\n    sub_fp2(T->Y, D, T->X);             /* D-X3 */\n    mul_fp2(T->Y, T->Y, E);             /* E*(D-X3) */\n    sub_fp2(T->Y, T->Y, C);             /* Y3 = E*(D-X3)-8*C */\n\n    /*\n     * line evaluation\n     */\n    sqr_fp2(line[0], line[0]);\n    sub_fp2(line[0], line[0], A);\n    sub_fp2(line[0], line[0], F);       /* (3*A+X1)^2 - X1^2 - 9*A^2 */\n    lshift_fp2(B, B, 2);\n    sub_fp2(line[0], line[0], B);       /* 6*X1^3 - 4*Y1^2 */\n\n    mul_fp2(line[1], E, ZZ);            /* 3*X1^2 * Z1^2 */\n\n    mul_fp2(line[2], T->Z, ZZ);         /* Z3 * Z1^2 */\n}\n\nstatic void line_by_Px2(vec384fp6 line, const POINTonE1_affine *Px2)\n{\n    mul_fp(line[1][0], line[1][0], Px2->X);   /* \"b01\" *= -2*P->X */\n    mul_fp(line[1][1], line[1][1], Px2->X);\n\n    mul_fp(line[2][0], line[2][0], Px2->Y);   /* \"b11\" *= 2*P->Y */\n    mul_fp(line[2][1], line[2][1], Px2->Y);\n}\n\n#if 0\nstatic void add_n_dbl(vec384fp12 ret, POINTonE2 *T, const POINTonE2_affine *Q,\n                      const POINTonE1_affine *Px2, vec384fp6 line, size_t n)\n{\n    line_add(line, T, T, Q);    line_by_Px2(line, Px2);\n    mul_by_xy00z0_fp12(ret, ret, line);\n    while (n--) {\n        sqr_fp12(ret, ret);\n        line_dbl(line, T, T);   line_by_Px2(line, Px2);\n        mul_by_xy00z0_fp12(ret, ret, line);\n    }\n}\n\nstatic void miller_loop(vec384fp12 ret, const POINTonE2 *Q, const POINTonE1 *P)\n{\n#define Q ((const POINTonE2_affine *)Q)\n    POINTonE2 T[1];\n    POINTonE1_affine Px2[1];\n    vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, \"xy00z0\"  */\n\n    /* Move common expression from line evaluation to line_by_Px2. */\n    add_fp(Px2->X, P->X, P->X);\n    neg_fp(Px2->X, Px2->X);\n    add_fp(Px2->Y, P->Y, P->Y);\n\n    vec_copy(T->X, Q->X, 2*sizeof(T->X));\n    vec_copy(T->Z, BLS12_381_Rx.p2, sizeof(T->Z));\n\n    /* first step is ret = 1^2*line, which is replaced with ret = line  */\n    line_dbl(line, T, T);                       /* 0x2                  */\n    line_by_Px2(line, Px2);\n    vec_zero(ret, sizeof(vec384fp12));\n    vec_copy(ret[0][0], line[0], 2*sizeof(vec384fp2));\n    vec_copy(ret[1][1], line[2], sizeof(vec384fp2));\n    add_n_dbl(ret, T, Q, Px2, line, 2);         /* ..0xc                */\n    add_n_dbl(ret, T, Q, Px2, line, 3);         /* ..0x68               */\n    add_n_dbl(ret, T, Q, Px2, line, 9);         /* ..0xd200             */\n    add_n_dbl(ret, T, Q, Px2, line, 32);        /* ..0xd20100000000     */\n    add_n_dbl(ret, T, Q, Px2, line, 16);        /* ..0xd201000000010000 */\n    conjugate_fp12(ret);                /* account for z being negative */\n#undef Q\n}\n#endif\n\nstatic void start_dbl_n(vec384fp12 ret, POINTonE2 T[],\n                                        const POINTonE1_affine Px2[], size_t n)\n{\n    size_t i;\n    vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, \"xy00z0\"  */\n\n    /* first step is ret = 1^2*line, which is replaced with ret = line  */\n    line_dbl(line, T+0, T+0);           line_by_Px2(line, Px2+0);\n    vec_zero(ret, sizeof(vec384fp12));\n    vec_copy(ret[0][0], line[0], 2*sizeof(vec384fp2));\n    vec_copy(ret[1][1], line[2], sizeof(vec384fp2));\n\n    for (i = 1; i < n; i++) {\n        line_dbl(line, T+i, T+i);       line_by_Px2(line, Px2+i);\n        mul_by_xy00z0_fp12(ret, ret, line);\n    }\n}\n\nstatic void add_n_dbl_n(vec384fp12 ret, POINTonE2 T[],\n                                        const POINTonE2_affine Q[],\n                                        const POINTonE1_affine Px2[],\n                                        size_t n, size_t k)\n{\n    size_t i;\n    vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, \"xy00z0\"  */\n\n    for (i = 0; i < n; i++) {\n        line_add(line, T+i, T+i, Q+i);  line_by_Px2(line, Px2+i);\n        mul_by_xy00z0_fp12(ret, ret, line);\n    }\n    while (k--) {\n        sqr_fp12(ret, ret);\n        for (i = 0; i < n; i++) {\n            line_dbl(line, T+i, T+i);   line_by_Px2(line, Px2+i);\n            mul_by_xy00z0_fp12(ret, ret, line);\n        }\n    }\n}\n\nstatic void miller_loop_n(vec384fp12 ret, const POINTonE2_affine Q[],\n                                          const POINTonE1_affine P[], size_t n)\n{\n#if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901 \\\n                               || defined(__STDC_NO_VLA__)\n    POINTonE2 *T = alloca(n*sizeof(POINTonE2));\n    POINTonE1_affine *Px2 = alloca(n*sizeof(POINTonE1_affine));\n#else\n    POINTonE2 T[n];\n    POINTonE1_affine Px2[n];\n#endif\n    size_t i;\n\n    if ((n == 1) && (vec_is_zero(&Q[0], sizeof(Q[0])) |\n                     vec_is_zero(&P[0], sizeof(P[0]))) ) {\n        /*\n         * Special case of infinite aggregated signature, pair the additive\n         * group's identity with the multiplicative group's identity.\n         */\n        vec_copy(ret, BLS12_381_Rx.p12, sizeof(vec384fp12));\n        return;\n    }\n\n    for (i = 0; i < n; i++) {\n        /* Move common expression from line evaluation to line_by_Px2.  */\n        add_fp(Px2[i].X, P[i].X, P[i].X);\n        neg_fp(Px2[i].X, Px2[i].X);\n        add_fp(Px2[i].Y, P[i].Y, P[i].Y);\n\n        vec_copy(T[i].X, Q[i].X, 2*sizeof(T[i].X));\n        vec_copy(T[i].Z, BLS12_381_Rx.p2, sizeof(T[i].Z));\n    }\n\n    /* first step is ret = 1^2*line, which is replaced with ret = line  */\n    start_dbl_n(ret, T, Px2, n);                /* 0x2                  */\n    add_n_dbl_n(ret, T, Q, Px2, n, 2);          /* ..0xc                */\n    add_n_dbl_n(ret, T, Q, Px2, n, 3);          /* ..0x68               */\n    add_n_dbl_n(ret, T, Q, Px2, n, 9);          /* ..0xd200             */\n    add_n_dbl_n(ret, T, Q, Px2, n, 32);         /* ..0xd20100000000     */\n    add_n_dbl_n(ret, T, Q, Px2, n, 16);         /* ..0xd201000000010000 */\n    conjugate_fp12(ret);                /* account for z being negative */\n}\n\nstatic void pre_add_n_dbl(vec384fp6 lines[], POINTonE2 *T,\n                                             const POINTonE2_affine *Q,\n                                             size_t n)\n{\n    line_add(lines++[0], T, T, Q);\n    while (n--)\n        line_dbl(lines++[0], T, T);\n}\n\nstatic void precompute_lines(vec384fp6 Qlines[68], const POINTonE2_affine *Q)\n{\n    POINTonE2 T[1];\n\n    vec_copy(T->X, Q->X, 2*sizeof(T->X));\n    vec_copy(T->Z, BLS12_381_Rx.p2, sizeof(T->Z));\n\n    line_dbl(Qlines[0], T, T);                  /* 0x2                  */\n    pre_add_n_dbl(&Qlines[1],  T, Q, 2);        /* ..0xc                */\n    pre_add_n_dbl(&Qlines[4],  T, Q, 3);        /* ..0x68               */\n    pre_add_n_dbl(&Qlines[8],  T, Q, 9);        /* ..0xd200             */\n    pre_add_n_dbl(&Qlines[18], T, Q, 32);       /* ..0xd20100000000     */\n    pre_add_n_dbl(&Qlines[51], T, Q, 16);       /* ..0xd201000000010000 */\n}\n\nstatic void post_line_by_Px2(vec384fp6 out, const vec384fp6 in,\n                                            const POINTonE1_affine *Px2)\n{\n    vec_copy(out[0], in[0], sizeof(out[0]));\n\n    mul_fp(out[1][0], in[1][0], Px2->X);        /* \"b01\" *= -2*P->X */\n    mul_fp(out[1][1], in[1][1], Px2->X);\n\n    mul_fp(out[2][0], in[2][0], Px2->Y);        /* \"b11\" *= 2*P->Y */\n    mul_fp(out[2][1], in[2][1], Px2->Y);\n}\n\nstatic void post_add_n_dbl(vec384fp12 ret, const vec384fp6 lines[],\n                           const POINTonE1_affine *Px2, size_t n)\n{\n    vec384fp6 line;\n\n    post_line_by_Px2(line, lines++[0], Px2);\n    mul_by_xy00z0_fp12(ret, ret, line);\n    while (n--) {\n        sqr_fp12(ret, ret);\n        post_line_by_Px2(line, lines++[0], Px2);\n        mul_by_xy00z0_fp12(ret, ret, line);\n    }\n}\n\nstatic void miller_loop_lines(vec384fp12 ret, const vec384fp6 Qlines[68],\n                                              const POINTonE1_affine *P)\n{\n    POINTonE1_affine Px2[1];\n    vec384fp6 line; /* it's not actual fp6, but 3 packed fp2, \"xy00z0\"  */\n\n    /* Move common expression from line evaluation to line_by_Px2. */\n    add_fp(Px2->X, P->X, P->X);\n    neg_fp(Px2->X, Px2->X);\n    add_fp(Px2->Y, P->Y, P->Y);\n\n    /* first step is ret = 1^2*line, which is replaced with ret = line  */\n    post_line_by_Px2(line, Qlines[0], Px2);     /* 0x2                  */\n    vec_zero(ret, sizeof(vec384fp12));\n    vec_copy(ret[0][0], line[0], 2*sizeof(vec384fp2));\n    vec_copy(ret[1][1], line[2], sizeof(vec384fp2));\n    post_add_n_dbl(ret, &Qlines[1],  Px2, 2);   /* ..0xc                */\n    post_add_n_dbl(ret, &Qlines[4],  Px2, 3);   /* ..0x68               */\n    post_add_n_dbl(ret, &Qlines[8],  Px2, 9);   /* ..0xd200             */\n    post_add_n_dbl(ret, &Qlines[18], Px2, 32);  /* ..0xd20100000000     */\n    post_add_n_dbl(ret, &Qlines[51], Px2, 16);  /* ..0xd201000000010000 */\n    conjugate_fp12(ret);                /* account for z being negative */\n}\n\n#ifdef INTERNAL_TESTMODE\nstatic void miller_loop_alt(vec384fp12 ret, const POINTonE2_affine *Q,\n                                            const POINTonE1_affine *P)\n{\n    vec384fp6 lines[68];\n\n    precompute_lines(lines, Q);\n    miller_loop_lines(ret, lines, P);\n}\n#endif\n\nstatic void mul_n_sqr(vec384fp12 ret, const vec384fp12 a, size_t n)\n{\n    mul_fp12(ret, ret, a);\n    while (n--)\n        cyclotomic_sqr_fp12(ret, ret);\n}\n\nstatic void raise_to_z_div_by_2(vec384fp12 ret, const vec384fp12 a)\n{\n    cyclotomic_sqr_fp12(ret, a);                /* 0x2                  */\n    mul_n_sqr(ret, a, 2);                       /* ..0xc                */\n    mul_n_sqr(ret, a, 3);                       /* ..0x68               */\n    mul_n_sqr(ret, a, 9);                       /* ..0xd200             */\n    mul_n_sqr(ret, a, 32);                      /* ..0xd20100000000     */\n    mul_n_sqr(ret, a, 16-1);                    /* ..0x6900800000008000 */\n    conjugate_fp12(ret);                /* account for z being negative */\n}\n\n#define raise_to_z(a, b) (raise_to_z_div_by_2(a, b), cyclotomic_sqr_fp12(a, a))\n\n/*\n * Adaptation from <zkcrypto>/pairing/src/bls12_381/mod.rs\n */\nstatic void final_exp(vec384fp12 ret, const vec384fp12 f)\n{\n    vec384fp12 y0, y1, y2, y3;\n\n    vec_copy(y1, f, sizeof(y1));\n    conjugate_fp12(y1);\n    inverse_fp12(y2, f);\n    mul_fp12(ret, y1, y2);\n    frobenius_map_fp12(y2, ret, 2);\n    mul_fp12(ret, ret, y2);\n\n    cyclotomic_sqr_fp12(y0, ret);\n    raise_to_z(y1, y0);\n    raise_to_z_div_by_2(y2, y1);\n    vec_copy(y3, ret, sizeof(y3));\n    conjugate_fp12(y3);\n    mul_fp12(y1, y1, y3);\n    conjugate_fp12(y1);\n    mul_fp12(y1, y1, y2);\n    raise_to_z(y2, y1);\n    raise_to_z(y3, y2);\n    conjugate_fp12(y1);\n    mul_fp12(y3, y3, y1);\n    conjugate_fp12(y1);\n    frobenius_map_fp12(y1, y1, 3);\n    frobenius_map_fp12(y2, y2, 2);\n    mul_fp12(y1, y1, y2);\n    raise_to_z(y2, y3);\n    mul_fp12(y2, y2, y0);\n    mul_fp12(y2, y2, ret);\n    mul_fp12(y1, y1, y2);\n    frobenius_map_fp12(y2, y3, 1);\n    mul_fp12(ret, y1, y2);\n}\n\nvoid blst_miller_loop(vec384fp12 ret, const POINTonE2_affine *Q,\n                                      const POINTonE1_affine *P)\n{   miller_loop_n(ret, Q ? Q : (const POINTonE2_affine *)&BLS12_381_G2,\n                       P ? P : (const POINTonE1_affine *)&BLS12_381_G1, 1);\n}\n\n#ifndef MILLER_LOOP_N_MAX\n# define MILLER_LOOP_N_MAX 16\n#endif\n\nvoid blst_miller_loop_n(vec384fp12 out, const POINTonE2_affine *const Qs[],\n                                        const POINTonE1_affine *const Ps[],\n                                        size_t n)\n{   /* ~10KB of stack storage */\n    POINTonE2 T[MILLER_LOOP_N_MAX];\n    POINTonE2_affine Q[MILLER_LOOP_N_MAX];\n    POINTonE1_affine Px2[MILLER_LOOP_N_MAX];\n    const POINTonE2_affine *Qptr = NULL;\n    const POINTonE1_affine *Pptr = NULL;\n    size_t i, j;\n\n    for (i = 0, j = 0; j < n; j++) {\n        Qptr = *Qs ? *Qs++ : Qptr+1;\n        Pptr = *Ps ? *Ps++ : Pptr+1;\n\n        /* Move common expression from line evaluation to line_by_Px2.  */\n        add_fp(Px2[i].X, Pptr->X, Pptr->X);\n        neg_fp(Px2[i].X, Px2[i].X);\n        add_fp(Px2[i].Y, Pptr->Y, Pptr->Y);\n\n        vec_copy(Q[i].X, Qptr->X, 2*sizeof(Q[i].X));\n        vec_copy(T[i].X, Qptr->X, 2*sizeof(T[i].X));\n        vec_copy(T[i].Z, BLS12_381_Rx.p2, sizeof(T[i].Z));\n\n        if (++i == MILLER_LOOP_N_MAX || j == n-1) {\n            vec384fp12 tmp;\n            vec384fp6 *ret = j < MILLER_LOOP_N_MAX ? out : tmp;\n\n            /* first step is ret = 1^2*line, which is just ret = line       */\n            start_dbl_n(ret, T, Px2, i);            /* 0x2                  */\n            add_n_dbl_n(ret, T, Q, Px2, i, 2);      /* ..0xc                */\n            add_n_dbl_n(ret, T, Q, Px2, i, 3);      /* ..0x68               */\n            add_n_dbl_n(ret, T, Q, Px2, i, 9);      /* ..0xd200             */\n            add_n_dbl_n(ret, T, Q, Px2, i, 32);     /* ..0xd20100000000     */\n            add_n_dbl_n(ret, T, Q, Px2, i, 16);     /* ..0xd201000000010000 */\n            conjugate_fp12(ret);            /* account for z being negative */\n\n            if (j >= MILLER_LOOP_N_MAX)\n                mul_fp12(out, out, ret);\n\n            i = 0;\n        }\n    }\n}\n\nvoid blst_final_exp(vec384fp12 ret, const vec384fp12 f)\n{   final_exp(ret, f);   }\n\nvoid blst_precompute_lines(vec384fp6 Qlines[68], const POINTonE2_affine *Q)\n{   precompute_lines(Qlines, Q);   }\n\nvoid blst_miller_loop_lines(vec384fp12 ret, const vec384fp6 Qlines[68],\n                                            const POINTonE1_affine *P)\n{   miller_loop_lines(ret, Qlines, P);   }\n\nstatic bool_t is_cyclotomic(const vec384fp12 f)\n{\n    vec384fp12 a, b;\n\n    frobenius_map_fp12(a, f, 2);\n    frobenius_map_fp12(b, a, 2);\n    mul_fp12(b, b, f);\n\n    return vec_is_equal(a, b, sizeof(a));\n}\n\nint blst_fp12_in_group(const vec384fp12 f)\n{\n    vec384fp12 a, b;\n\n    if (vec_is_zero(f, sizeof(vec384fp12)) || !is_cyclotomic(f))\n        return 0;\n\n    frobenius_map_fp12(a, f, 1);\n    raise_to_z(b, f);\n\n    return (int)vec_is_equal(a, b, sizeof(a));\n}\n"
  },
  {
    "path": "src/pentaroot-addchain.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n/*\n * The \"magic\" number is 1/5 modulo BLS12_381_r-1. Exponentiation to which\n * yields 5th root of the base.\n *\n * Generated with 'addchain 20974350070050476191779096203274386335076221000211055129041463479975432473805'\n * https://github.com/kwantam/addchain\n * # Bos-Coster (win=4)           :  307 (15)\n * # Bos-Coster (win=10)          :  307 (18)\n * # Yacobi                       :  319 (16)\n * # Bos-Coster (win=2)           :  319 ( 5)\n * # Bos-Coster (win=5)           :  306 (19) <<<\n * # Bos-Coster (win=7)           :  311 (22)\n * # Bos-Coster (win=9)           :  313 (20)\n * # Bos-Coster (win=3)           :  314 ( 9)\n * # Bos-Coster (win=6)           :  309 (21)\n * # Bos-Coster (win=8)           :  309 (23)\n * # Bergeron-Berstel-Brlek-Duboc :  334 ( 5)\n */\n\n#define PENTAROOT_MOD_BLS12_381_r(out, inp, ptype) do { \\\nptype t[19]; \\\nvec_copy(t[1], inp, sizeof(ptype)); /*    0: 1 */\\\nsqr(t[7], t[1]);                    /*    1: 2 */\\\nsqr(t[0], t[7]);                    /*    2: 4 */\\\nsqr(t[2], t[0]);                    /*    3: 8 */\\\nmul(t[10], t[2], t[1]);             /*    4: 9 */\\\nmul(t[3], t[10], t[7]);             /*    5: b */\\\nmul(t[1], t[10], t[0]);             /*    6: d */\\\nmul(t[5], t[3], t[0]);              /*    7: f */\\\nmul(t[9], t[10], t[2]);             /*    8: 11 */\\\nmul(t[4], t[3], t[2]);              /*    9: 13 */\\\nmul(t[15], t[5], t[2]);             /*   10: 17 */\\\nmul(t[8], t[15], t[2]);             /*   11: 1f */\\\nmul(t[13], t[8], t[7]);             /*   12: 21 */\\\nmul(t[14], t[8], t[0]);             /*   13: 23 */\\\nmul(t[12], t[13], t[0]);            /*   14: 25 */\\\nmul(t[6], t[8], t[2]);              /*   15: 27 */\\\nmul(t[11], t[14], t[2]);            /*   16: 2b */\\\nsqr(t[0], t[15]);                   /*   17: 2e */\\\nmul(t[18], t[6], t[2]);             /*   18: 2f */\\\nmul(t[2], t[11], t[2]);             /*   19: 33 */\\\nmul(t[16], t[2], t[7]);             /*   20: 35 */\\\nmul(t[7], t[0], t[3]);              /*   21: 39 */\\\nmul(t[17], t[0], t[5]);             /*   22: 3d */\\\n/* sqr(t[0], t[0]); */              /*   23: 5c */\\\n/* sqr(t[0], t[0]); */              /*   24: b8 */\\\n/* sqr(t[0], t[0]); */              /*   25: 170 */\\\n/* sqr(t[0], t[0]); */              /*   26: 2e0 */\\\n/* sqr(t[0], t[0]); */              /*   27: 5c0 */\\\n/* sqr(t[0], t[0]); */              /*   28: b80 */\\\n/* sqr(t[0], t[0]); */              /*   29: 1700 */\\\nsqr_n_mul(t[0], t[0], 7, t[18]);    /*   30: 172f */\\\n/* sqr(t[0], t[0]); */              /*   31: 2e5e */\\\n/* sqr(t[0], t[0]); */              /*   32: 5cbc */\\\n/* sqr(t[0], t[0]); */              /*   33: b978 */\\\n/* sqr(t[0], t[0]); */              /*   34: 172f0 */\\\n/* sqr(t[0], t[0]); */              /*   35: 2e5e0 */\\\n/* sqr(t[0], t[0]); */              /*   36: 5cbc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[13]);    /*   37: 5cbe1 */\\\n/* sqr(t[0], t[0]); */              /*   38: b97c2 */\\\n/* sqr(t[0], t[0]); */              /*   39: 172f84 */\\\n/* sqr(t[0], t[0]); */              /*   40: 2e5f08 */\\\n/* sqr(t[0], t[0]); */              /*   41: 5cbe10 */\\\n/* sqr(t[0], t[0]); */              /*   42: b97c20 */\\\n/* sqr(t[0], t[0]); */              /*   43: 172f840 */\\\nsqr_n_mul(t[0], t[0], 6, t[17]);    /*   44: 172f87d */\\\n/* sqr(t[0], t[0]); */              /*   45: 2e5f0fa */\\\n/* sqr(t[0], t[0]); */              /*   46: 5cbe1f4 */\\\n/* sqr(t[0], t[0]); */              /*   47: b97c3e8 */\\\n/* sqr(t[0], t[0]); */              /*   48: 172f87d0 */\\\n/* sqr(t[0], t[0]); */              /*   49: 2e5f0fa0 */\\\n/* sqr(t[0], t[0]); */              /*   50: 5cbe1f40 */\\\nsqr_n_mul(t[0], t[0], 6, t[16]);    /*   51: 5cbe1f75 */\\\n/* sqr(t[0], t[0]); */              /*   52: b97c3eea */\\\n/* sqr(t[0], t[0]); */              /*   53: 172f87dd4 */\\\n/* sqr(t[0], t[0]); */              /*   54: 2e5f0fba8 */\\\n/* sqr(t[0], t[0]); */              /*   55: 5cbe1f750 */\\\n/* sqr(t[0], t[0]); */              /*   56: b97c3eea0 */\\\nsqr_n_mul(t[0], t[0], 5, t[15]);    /*   57: b97c3eeb7 */\\\n/* sqr(t[0], t[0]); */              /*   58: 172f87dd6e */\\\n/* sqr(t[0], t[0]); */              /*   59: 2e5f0fbadc */\\\n/* sqr(t[0], t[0]); */              /*   60: 5cbe1f75b8 */\\\n/* sqr(t[0], t[0]); */              /*   61: b97c3eeb70 */\\\n/* sqr(t[0], t[0]); */              /*   62: 172f87dd6e0 */\\\n/* sqr(t[0], t[0]); */              /*   63: 2e5f0fbadc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[15]);    /*   64: 2e5f0fbadd7 */\\\n/* sqr(t[0], t[0]); */              /*   65: 5cbe1f75bae */\\\n/* sqr(t[0], t[0]); */              /*   66: b97c3eeb75c */\\\n/* sqr(t[0], t[0]); */              /*   67: 172f87dd6eb8 */\\\n/* sqr(t[0], t[0]); */              /*   68: 2e5f0fbadd70 */\\\n/* sqr(t[0], t[0]); */              /*   69: 5cbe1f75bae0 */\\\n/* sqr(t[0], t[0]); */              /*   70: b97c3eeb75c0 */\\\n/* sqr(t[0], t[0]); */              /*   71: 172f87dd6eb80 */\\\n/* sqr(t[0], t[0]); */              /*   72: 2e5f0fbadd700 */\\\nsqr_n_mul(t[0], t[0], 8, t[14]);    /*   73: 2e5f0fbadd723 */\\\n/* sqr(t[0], t[0]); */              /*   74: 5cbe1f75bae46 */\\\n/* sqr(t[0], t[0]); */              /*   75: b97c3eeb75c8c */\\\n/* sqr(t[0], t[0]); */              /*   76: 172f87dd6eb918 */\\\n/* sqr(t[0], t[0]); */              /*   77: 2e5f0fbadd7230 */\\\n/* sqr(t[0], t[0]); */              /*   78: 5cbe1f75bae460 */\\\n/* sqr(t[0], t[0]); */              /*   79: b97c3eeb75c8c0 */\\\n/* sqr(t[0], t[0]); */              /*   80: 172f87dd6eb9180 */\\\n/* sqr(t[0], t[0]); */              /*   81: 2e5f0fbadd72300 */\\\nsqr_n_mul(t[0], t[0], 8, t[13]);    /*   82: 2e5f0fbadd72321 */\\\n/* sqr(t[0], t[0]); */              /*   83: 5cbe1f75bae4642 */\\\n/* sqr(t[0], t[0]); */              /*   84: b97c3eeb75c8c84 */\\\n/* sqr(t[0], t[0]); */              /*   85: 172f87dd6eb91908 */\\\n/* sqr(t[0], t[0]); */              /*   86: 2e5f0fbadd723210 */\\\n/* sqr(t[0], t[0]); */              /*   87: 5cbe1f75bae46420 */\\\n/* sqr(t[0], t[0]); */              /*   88: b97c3eeb75c8c840 */\\\nsqr_n_mul(t[0], t[0], 6, t[2]);     /*   89: b97c3eeb75c8c873 */\\\n/* sqr(t[0], t[0]); */              /*   90: 172f87dd6eb9190e6 */\\\n/* sqr(t[0], t[0]); */              /*   91: 2e5f0fbadd72321cc */\\\n/* sqr(t[0], t[0]); */              /*   92: 5cbe1f75bae464398 */\\\n/* sqr(t[0], t[0]); */              /*   93: b97c3eeb75c8c8730 */\\\n/* sqr(t[0], t[0]); */              /*   94: 172f87dd6eb9190e60 */\\\n/* sqr(t[0], t[0]); */              /*   95: 2e5f0fbadd72321cc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[13]);    /*   96: 2e5f0fbadd72321ce1 */\\\n/* sqr(t[0], t[0]); */              /*   97: 5cbe1f75bae46439c2 */\\\n/* sqr(t[0], t[0]); */              /*   98: b97c3eeb75c8c87384 */\\\n/* sqr(t[0], t[0]); */              /*   99: 172f87dd6eb9190e708 */\\\n/* sqr(t[0], t[0]); */              /*  100: 2e5f0fbadd72321ce10 */\\\n/* sqr(t[0], t[0]); */              /*  101: 5cbe1f75bae46439c20 */\\\n/* sqr(t[0], t[0]); */              /*  102: b97c3eeb75c8c873840 */\\\n/* sqr(t[0], t[0]); */              /*  103: 172f87dd6eb9190e7080 */\\\nsqr_n_mul(t[0], t[0], 7, t[12]);    /*  104: 172f87dd6eb9190e70a5 */\\\n/* sqr(t[0], t[0]); */              /*  105: 2e5f0fbadd72321ce14a */\\\n/* sqr(t[0], t[0]); */              /*  106: 5cbe1f75bae46439c294 */\\\n/* sqr(t[0], t[0]); */              /*  107: b97c3eeb75c8c8738528 */\\\n/* sqr(t[0], t[0]); */              /*  108: 172f87dd6eb9190e70a50 */\\\n/* sqr(t[0], t[0]); */              /*  109: 2e5f0fbadd72321ce14a0 */\\\n/* sqr(t[0], t[0]); */              /*  110: 5cbe1f75bae46439c2940 */\\\n/* sqr(t[0], t[0]); */              /*  111: b97c3eeb75c8c87385280 */\\\n/* sqr(t[0], t[0]); */              /*  112: 172f87dd6eb9190e70a500 */\\\nsqr_n_mul(t[0], t[0], 8, t[11]);    /*  113: 172f87dd6eb9190e70a52b */\\\n/* sqr(t[0], t[0]); */              /*  114: 2e5f0fbadd72321ce14a56 */\\\n/* sqr(t[0], t[0]); */              /*  115: 5cbe1f75bae46439c294ac */\\\n/* sqr(t[0], t[0]); */              /*  116: b97c3eeb75c8c873852958 */\\\n/* sqr(t[0], t[0]); */              /*  117: 172f87dd6eb9190e70a52b0 */\\\n/* sqr(t[0], t[0]); */              /*  118: 2e5f0fbadd72321ce14a560 */\\\n/* sqr(t[0], t[0]); */              /*  119: 5cbe1f75bae46439c294ac0 */\\\nsqr_n_mul(t[0], t[0], 6, t[1]);     /*  120: 5cbe1f75bae46439c294acd */\\\n/* sqr(t[0], t[0]); */              /*  121: b97c3eeb75c8c873852959a */\\\n/* sqr(t[0], t[0]); */              /*  122: 172f87dd6eb9190e70a52b34 */\\\n/* sqr(t[0], t[0]); */              /*  123: 2e5f0fbadd72321ce14a5668 */\\\n/* sqr(t[0], t[0]); */              /*  124: 5cbe1f75bae46439c294acd0 */\\\n/* sqr(t[0], t[0]); */              /*  125: b97c3eeb75c8c873852959a0 */\\\n/* sqr(t[0], t[0]); */              /*  126: 172f87dd6eb9190e70a52b340 */\\\n/* sqr(t[0], t[0]); */              /*  127: 2e5f0fbadd72321ce14a56680 */\\\n/* sqr(t[0], t[0]); */              /*  128: 5cbe1f75bae46439c294acd00 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  129: 5cbe1f75bae46439c294acd33 */\\\n/* sqr(t[0], t[0]); */              /*  130: b97c3eeb75c8c873852959a66 */\\\n/* sqr(t[0], t[0]); */              /*  131: 172f87dd6eb9190e70a52b34cc */\\\n/* sqr(t[0], t[0]); */              /*  132: 2e5f0fbadd72321ce14a566998 */\\\n/* sqr(t[0], t[0]); */              /*  133: 5cbe1f75bae46439c294acd330 */\\\n/* sqr(t[0], t[0]); */              /*  134: b97c3eeb75c8c873852959a660 */\\\n/* sqr(t[0], t[0]); */              /*  135: 172f87dd6eb9190e70a52b34cc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[11]);    /*  136: 172f87dd6eb9190e70a52b34ceb */\\\n/* sqr(t[0], t[0]); */              /*  137: 2e5f0fbadd72321ce14a56699d6 */\\\n/* sqr(t[0], t[0]); */              /*  138: 5cbe1f75bae46439c294acd33ac */\\\n/* sqr(t[0], t[0]); */              /*  139: b97c3eeb75c8c873852959a6758 */\\\n/* sqr(t[0], t[0]); */              /*  140: 172f87dd6eb9190e70a52b34ceb0 */\\\nsqr_n_mul(t[0], t[0], 4, t[10]);    /*  141: 172f87dd6eb9190e70a52b34ceb9 */\\\n/* sqr(t[0], t[0]); */              /*  142: 2e5f0fbadd72321ce14a56699d72 */\\\n/* sqr(t[0], t[0]); */              /*  143: 5cbe1f75bae46439c294acd33ae4 */\\\n/* sqr(t[0], t[0]); */              /*  144: b97c3eeb75c8c873852959a675c8 */\\\n/* sqr(t[0], t[0]); */              /*  145: 172f87dd6eb9190e70a52b34ceb90 */\\\n/* sqr(t[0], t[0]); */              /*  146: 2e5f0fbadd72321ce14a56699d720 */\\\nsqr_n_mul(t[0], t[0], 5, t[8]);     /*  147: 2e5f0fbadd72321ce14a56699d73f */\\\n/* sqr(t[0], t[0]); */              /*  148: 5cbe1f75bae46439c294acd33ae7e */\\\n/* sqr(t[0], t[0]); */              /*  149: b97c3eeb75c8c873852959a675cfc */\\\n/* sqr(t[0], t[0]); */              /*  150: 172f87dd6eb9190e70a52b34ceb9f8 */\\\n/* sqr(t[0], t[0]); */              /*  151: 2e5f0fbadd72321ce14a56699d73f0 */\\\n/* sqr(t[0], t[0]); */              /*  152: 5cbe1f75bae46439c294acd33ae7e0 */\\\n/* sqr(t[0], t[0]); */              /*  153: b97c3eeb75c8c873852959a675cfc0 */\\\n/* sqr(t[0], t[0]); */              /*  154: 172f87dd6eb9190e70a52b34ceb9f80 */\\\n/* sqr(t[0], t[0]); */              /*  155: 2e5f0fbadd72321ce14a56699d73f00 */\\\n/* sqr(t[0], t[0]); */              /*  156: 5cbe1f75bae46439c294acd33ae7e00 */\\\n/* sqr(t[0], t[0]); */              /*  157: b97c3eeb75c8c873852959a675cfc00 */\\\n/* sqr(t[0], t[0]); */              /*  158: 172f87dd6eb9190e70a52b34ceb9f800 */\\\n/* sqr(t[0], t[0]); */              /*  159: 2e5f0fbadd72321ce14a56699d73f000 */\\\n/* sqr(t[0], t[0]); */              /*  160: 5cbe1f75bae46439c294acd33ae7e000 */\\\n/* sqr(t[0], t[0]); */              /*  161: b97c3eeb75c8c873852959a675cfc000 */\\\n/* sqr(t[0], t[0]); */              /*  162: 172f87dd6eb9190e70a52b34ceb9f8000 */\\\nsqr_n_mul(t[0], t[0], 15, t[9]);    /*  163: 172f87dd6eb9190e70a52b34ceb9f8011 */\\\n/* sqr(t[0], t[0]); */              /*  164: 2e5f0fbadd72321ce14a56699d73f0022 */\\\n/* sqr(t[0], t[0]); */              /*  165: 5cbe1f75bae46439c294acd33ae7e0044 */\\\n/* sqr(t[0], t[0]); */              /*  166: b97c3eeb75c8c873852959a675cfc0088 */\\\n/* sqr(t[0], t[0]); */              /*  167: 172f87dd6eb9190e70a52b34ceb9f80110 */\\\n/* sqr(t[0], t[0]); */              /*  168: 2e5f0fbadd72321ce14a56699d73f00220 */\\\n/* sqr(t[0], t[0]); */              /*  169: 5cbe1f75bae46439c294acd33ae7e00440 */\\\n/* sqr(t[0], t[0]); */              /*  170: b97c3eeb75c8c873852959a675cfc00880 */\\\n/* sqr(t[0], t[0]); */              /*  171: 172f87dd6eb9190e70a52b34ceb9f801100 */\\\nsqr_n_mul(t[0], t[0], 8, t[3]);     /*  172: 172f87dd6eb9190e70a52b34ceb9f80110b */\\\n/* sqr(t[0], t[0]); */              /*  173: 2e5f0fbadd72321ce14a56699d73f002216 */\\\n/* sqr(t[0], t[0]); */              /*  174: 5cbe1f75bae46439c294acd33ae7e00442c */\\\n/* sqr(t[0], t[0]); */              /*  175: b97c3eeb75c8c873852959a675cfc008858 */\\\n/* sqr(t[0], t[0]); */              /*  176: 172f87dd6eb9190e70a52b34ceb9f80110b0 */\\\n/* sqr(t[0], t[0]); */              /*  177: 2e5f0fbadd72321ce14a56699d73f0022160 */\\\nsqr_n_mul(t[0], t[0], 5, t[8]);     /*  178: 2e5f0fbadd72321ce14a56699d73f002217f */\\\n/* sqr(t[0], t[0]); */              /*  179: 5cbe1f75bae46439c294acd33ae7e00442fe */\\\n/* sqr(t[0], t[0]); */              /*  180: b97c3eeb75c8c873852959a675cfc00885fc */\\\n/* sqr(t[0], t[0]); */              /*  181: 172f87dd6eb9190e70a52b34ceb9f80110bf8 */\\\n/* sqr(t[0], t[0]); */              /*  182: 2e5f0fbadd72321ce14a56699d73f002217f0 */\\\n/* sqr(t[0], t[0]); */              /*  183: 5cbe1f75bae46439c294acd33ae7e00442fe0 */\\\n/* sqr(t[0], t[0]); */              /*  184: b97c3eeb75c8c873852959a675cfc00885fc0 */\\\n/* sqr(t[0], t[0]); */              /*  185: 172f87dd6eb9190e70a52b34ceb9f80110bf80 */\\\n/* sqr(t[0], t[0]); */              /*  186: 2e5f0fbadd72321ce14a56699d73f002217f00 */\\\n/* sqr(t[0], t[0]); */              /*  187: 5cbe1f75bae46439c294acd33ae7e00442fe00 */\\\n/* sqr(t[0], t[0]); */              /*  188: b97c3eeb75c8c873852959a675cfc00885fc00 */\\\nsqr_n_mul(t[0], t[0], 10, t[7]);    /*  189: b97c3eeb75c8c873852959a675cfc00885fc39 */\\\n/* sqr(t[0], t[0]); */              /*  190: 172f87dd6eb9190e70a52b34ceb9f80110bf872 */\\\n/* sqr(t[0], t[0]); */              /*  191: 2e5f0fbadd72321ce14a56699d73f002217f0e4 */\\\n/* sqr(t[0], t[0]); */              /*  192: 5cbe1f75bae46439c294acd33ae7e00442fe1c8 */\\\n/* sqr(t[0], t[0]); */              /*  193: b97c3eeb75c8c873852959a675cfc00885fc390 */\\\n/* sqr(t[0], t[0]); */              /*  194: 172f87dd6eb9190e70a52b34ceb9f80110bf8720 */\\\n/* sqr(t[0], t[0]); */              /*  195: 2e5f0fbadd72321ce14a56699d73f002217f0e40 */\\\nsqr_n_mul(t[0], t[0], 6, t[6]);     /*  196: 2e5f0fbadd72321ce14a56699d73f002217f0e67 */\\\n/* sqr(t[0], t[0]); */              /*  197: 5cbe1f75bae46439c294acd33ae7e00442fe1cce */\\\n/* sqr(t[0], t[0]); */              /*  198: b97c3eeb75c8c873852959a675cfc00885fc399c */\\\n/* sqr(t[0], t[0]); */              /*  199: 172f87dd6eb9190e70a52b34ceb9f80110bf87338 */\\\n/* sqr(t[0], t[0]); */              /*  200: 2e5f0fbadd72321ce14a56699d73f002217f0e670 */\\\n/* sqr(t[0], t[0]); */              /*  201: 5cbe1f75bae46439c294acd33ae7e00442fe1cce0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  202: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3 */\\\n/* sqr(t[0], t[0]); */              /*  203: b97c3eeb75c8c873852959a675cfc00885fc399e6 */\\\n/* sqr(t[0], t[0]); */              /*  204: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cc */\\\n/* sqr(t[0], t[0]); */              /*  205: 2e5f0fbadd72321ce14a56699d73f002217f0e6798 */\\\n/* sqr(t[0], t[0]); */              /*  206: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf30 */\\\n/* sqr(t[0], t[0]); */              /*  207: b97c3eeb75c8c873852959a675cfc00885fc399e60 */\\\n/* sqr(t[0], t[0]); */              /*  208: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cc0 */\\\n/* sqr(t[0], t[0]); */              /*  209: 2e5f0fbadd72321ce14a56699d73f002217f0e67980 */\\\n/* sqr(t[0], t[0]); */              /*  210: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf300 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  211: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf333 */\\\n/* sqr(t[0], t[0]); */              /*  212: b97c3eeb75c8c873852959a675cfc00885fc399e666 */\\\n/* sqr(t[0], t[0]); */              /*  213: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc */\\\n/* sqr(t[0], t[0]); */              /*  214: 2e5f0fbadd72321ce14a56699d73f002217f0e679998 */\\\n/* sqr(t[0], t[0]); */              /*  215: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3330 */\\\n/* sqr(t[0], t[0]); */              /*  216: b97c3eeb75c8c873852959a675cfc00885fc399e6660 */\\\n/* sqr(t[0], t[0]); */              /*  217: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc0 */\\\n/* sqr(t[0], t[0]); */              /*  218: 2e5f0fbadd72321ce14a56699d73f002217f0e6799980 */\\\nsqr_n_mul(t[0], t[0], 7, t[5]);     /*  219: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f */\\\n/* sqr(t[0], t[0]); */              /*  220: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e */\\\n/* sqr(t[0], t[0]); */              /*  221: b97c3eeb75c8c873852959a675cfc00885fc399e6663c */\\\n/* sqr(t[0], t[0]); */              /*  222: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78 */\\\n/* sqr(t[0], t[0]); */              /*  223: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f0 */\\\n/* sqr(t[0], t[0]); */              /*  224: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e0 */\\\n/* sqr(t[0], t[0]); */              /*  225: b97c3eeb75c8c873852959a675cfc00885fc399e6663c0 */\\\n/* sqr(t[0], t[0]); */              /*  226: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc780 */\\\n/* sqr(t[0], t[0]); */              /*  227: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f00 */\\\n/* sqr(t[0], t[0]); */              /*  228: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e00 */\\\nsqr_n_mul(t[0], t[0], 9, t[2]);     /*  229: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33 */\\\n/* sqr(t[0], t[0]); */              /*  230: b97c3eeb75c8c873852959a675cfc00885fc399e6663c66 */\\\n/* sqr(t[0], t[0]); */              /*  231: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc */\\\n/* sqr(t[0], t[0]); */              /*  232: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f198 */\\\n/* sqr(t[0], t[0]); */              /*  233: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e330 */\\\n/* sqr(t[0], t[0]); */              /*  234: b97c3eeb75c8c873852959a675cfc00885fc399e6663c660 */\\\n/* sqr(t[0], t[0]); */              /*  235: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc0 */\\\n/* sqr(t[0], t[0]); */              /*  236: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1980 */\\\nsqr_n_mul(t[0], t[0], 7, t[4]);     /*  237: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993 */\\\n/* sqr(t[0], t[0]); */              /*  238: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326 */\\\n/* sqr(t[0], t[0]); */              /*  239: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664c */\\\n/* sqr(t[0], t[0]); */              /*  240: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc98 */\\\n/* sqr(t[0], t[0]); */              /*  241: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19930 */\\\n/* sqr(t[0], t[0]); */              /*  242: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33260 */\\\n/* sqr(t[0], t[0]); */              /*  243: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664c0 */\\\n/* sqr(t[0], t[0]); */              /*  244: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc980 */\\\n/* sqr(t[0], t[0]); */              /*  245: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f199300 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  246: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f199333 */\\\n/* sqr(t[0], t[0]); */              /*  247: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666 */\\\n/* sqr(t[0], t[0]); */              /*  248: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccc */\\\n/* sqr(t[0], t[0]); */              /*  249: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9998 */\\\n/* sqr(t[0], t[0]); */              /*  250: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993330 */\\\n/* sqr(t[0], t[0]); */              /*  251: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326660 */\\\n/* sqr(t[0], t[0]); */              /*  252: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccc0 */\\\n/* sqr(t[0], t[0]); */              /*  253: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99980 */\\\n/* sqr(t[0], t[0]); */              /*  254: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933300 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  255: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333 */\\\n/* sqr(t[0], t[0]); */              /*  256: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666 */\\\n/* sqr(t[0], t[0]); */              /*  257: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccc */\\\n/* sqr(t[0], t[0]); */              /*  258: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999998 */\\\n/* sqr(t[0], t[0]); */              /*  259: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f199333330 */\\\n/* sqr(t[0], t[0]); */              /*  260: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666660 */\\\n/* sqr(t[0], t[0]); */              /*  261: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccc0 */\\\n/* sqr(t[0], t[0]); */              /*  262: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999980 */\\\n/* sqr(t[0], t[0]); */              /*  263: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993333300 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  264: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f1993333333 */\\\n/* sqr(t[0], t[0]); */              /*  265: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326666666 */\\\n/* sqr(t[0], t[0]); */              /*  266: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccccc */\\\n/* sqr(t[0], t[0]); */              /*  267: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99999998 */\\\n/* sqr(t[0], t[0]); */              /*  268: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333330 */\\\n/* sqr(t[0], t[0]); */              /*  269: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666660 */\\\n/* sqr(t[0], t[0]); */              /*  270: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664ccccccc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[3]);     /*  271: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb */\\\n/* sqr(t[0], t[0]); */              /*  272: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999999996 */\\\n/* sqr(t[0], t[0]); */              /*  273: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332c */\\\n/* sqr(t[0], t[0]); */              /*  274: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666666658 */\\\n/* sqr(t[0], t[0]); */              /*  275: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb0 */\\\n/* sqr(t[0], t[0]); */              /*  276: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999999960 */\\\n/* sqr(t[0], t[0]); */              /*  277: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332c0 */\\\n/* sqr(t[0], t[0]); */              /*  278: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326666666580 */\\\n/* sqr(t[0], t[0]); */              /*  279: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb00 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  280: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb33 */\\\n/* sqr(t[0], t[0]); */              /*  281: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99999999666 */\\\n/* sqr(t[0], t[0]); */              /*  282: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccc */\\\n/* sqr(t[0], t[0]); */              /*  283: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666665998 */\\\n/* sqr(t[0], t[0]); */              /*  284: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb330 */\\\n/* sqr(t[0], t[0]); */              /*  285: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999999996660 */\\\n/* sqr(t[0], t[0]); */              /*  286: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccc0 */\\\n/* sqr(t[0], t[0]); */              /*  287: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666666659980 */\\\n/* sqr(t[0], t[0]); */              /*  288: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb3300 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  289: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb3333 */\\\n/* sqr(t[0], t[0]); */              /*  290: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999999966666 */\\\n/* sqr(t[0], t[0]); */              /*  291: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccc */\\\n/* sqr(t[0], t[0]); */              /*  292: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e3326666666599998 */\\\n/* sqr(t[0], t[0]); */              /*  293: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb33330 */\\\n/* sqr(t[0], t[0]); */              /*  294: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc99999999666660 */\\\n/* sqr(t[0], t[0]); */              /*  295: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccc0 */\\\n/* sqr(t[0], t[0]); */              /*  296: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e33266666665999980 */\\\n/* sqr(t[0], t[0]); */              /*  297: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb333300 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  298: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb333333 */\\\n/* sqr(t[0], t[0]); */              /*  299: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc999999996666666 */\\\n/* sqr(t[0], t[0]); */              /*  300: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccccc */\\\n/* sqr(t[0], t[0]); */              /*  301: 5cbe1f75bae46439c294acd33ae7e00442fe1ccf3331e332666666659999998 */\\\n/* sqr(t[0], t[0]); */              /*  302: b97c3eeb75c8c873852959a675cfc00885fc399e6663c664cccccccb3333330 */\\\n/* sqr(t[0], t[0]); */              /*  303: 172f87dd6eb9190e70a52b34ceb9f80110bf8733cccc78cc9999999966666660 */\\\n/* sqr(t[0], t[0]); */              /*  304: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332ccccccc0 */\\\nsqr_n_mul(out, t[0], 6, t[1]);      /*  305: 2e5f0fbadd72321ce14a56699d73f002217f0e679998f19933333332cccccccd */\\\n} while(0)\n"
  },
  {
    "path": "src/pentaroot.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"fields.h\"\n\nstatic inline void mul_fr(vec256 ret, const vec256 a, const vec256 b)\n{   mul_mont_sparse_256(ret, a, b, BLS12_381_r, r0);   }\n\nstatic inline void sqr_fr(vec256 ret, const vec256 a)\n{   sqr_mont_sparse_256(ret, a, BLS12_381_r, r0);   }\n\n#ifdef __OPTIMIZE_SIZE__\nvoid blst_fr_pentaroot(vec256 out, const vec256 inp)\n{\n    static const byte pow[] = {\n        TO_BYTES(0x33333332cccccccd), TO_BYTES(0x217f0e679998f199),\n        TO_BYTES(0xe14a56699d73f002), TO_BYTES(0x2e5f0fbadd72321c)\n    };\n    size_t pow_bits = 254;\n    vec256 ret;\n\n    vec_copy(ret, inp, sizeof(ret));  /* ret = inp^1 */\n    --pow_bits; /* most significant bit is set, skip over */\n    while (pow_bits--) {\n        sqr_fr(ret, ret);\n        if (is_bit_set(pow, pow_bits))\n            mul_fr(ret, ret, inp);\n    }\n    vec_copy(out, ret, sizeof(ret));  /* out = ret */\n}\n#else\n# if 0\n/*\n * \"255\"-bit variant omits full reductions at the ends of squarings,\n * not implemented yet[?].\n */\nstatic inline void sqr_n_mul_fr(vec256 out, const vec256 a, size_t count,\n                                const vec256 b)\n{   sqr_n_mul_mont_255(out, a, count, BLS12_381_r, r0, b);   }\n# else\nstatic void sqr_n_mul_fr(vec256 out, const vec256 a, size_t count,\n                         const vec256 b)\n{\n    do {\n        sqr_fr(out, a);\n        a = out;\n    } while (--count);\n    mul_fr(out, out, b);\n}\n# endif\n\n# define sqr(ret,a)\t\tsqr_fr(ret,a)\n# define mul(ret,a,b)\t\tmul_fr(ret,a,b)\n# define sqr_n_mul(ret,a,n,b)\tsqr_n_mul_fr(ret,a,n,b)\n\n# include \"pentaroot-addchain.h\"\nvoid blst_fr_pentaroot(vec256 out, const vec256 inp)\n{   PENTAROOT_MOD_BLS12_381_r(out, inp, vec256);   }\n# undef PENTAROOT_MOD_BLS12_381_r\n\n# undef sqr_n_mul\n# undef sqr\n# undef mul\n#endif\n\nvoid blst_fr_pentapow(vec256 out, const vec256 inp)\n{\n    vec256 tmp;\n\n    sqr_fr(tmp, inp);\n    sqr_fr(tmp, tmp);\n    mul_fr(out, tmp, inp);\n}\n"
  },
  {
    "path": "src/point.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_POINT_H__\n#define __BLS12_381_ASM_POINT_H__\n\n#include \"vect.h\"\n#include \"bytes.h\"\n\n#define DECLARE_POINT(ptype, bits) \\\ntypedef struct { vec##bits X,Y,Z; } ptype; \\\ntypedef struct { vec##bits X,Y; } ptype##_affine; \\\n\\\nstatic void ptype##_dadd(ptype *out, const ptype *p1, const ptype *p2,\t\\\n                         const vec##bits a4);\t\t\t\t\\\nstatic void ptype##_dadd_affine(ptype *out, const ptype *p1,\t\t\\\n                                            const ptype##_affine *p2);\t\\\nstatic void ptype##_add(ptype *out, const ptype *p1, const ptype *p2);\t\\\nstatic void ptype##_add_affine(ptype *out, const ptype *p1,\t\t\\\n                                           const ptype##_affine *p2);\t\\\nstatic void ptype##_double(ptype *out, const ptype *p1);\t\t\\\nstatic void ptype##_mult_w5(ptype *out, const ptype *point,\t\t\\\n                            const byte *scalar, size_t nbits);\t\t\\\nstatic void ptype##_cneg(ptype *p, limb_t cbit);\t\t\t\\\nstatic void ptype##_to_affine(ptype##_affine *out, const ptype *in);\t\\\nstatic void ptype##_from_Jacobian(ptype *out, const ptype *in);\t\t\\\n\\\nstatic inline void ptype##_cswap(ptype *restrict a,\t\t\t\\\n                                 ptype *restrict b, bool_t cbit) {\t\\\n    vec_cswap(a, b, sizeof(ptype), cbit);\t\t\t\t\\\n} \\\nstatic inline void ptype##_ccopy(ptype *restrict a,\t\t\t\\\n                                 const ptype *restrict b, bool_t cbit) {\\\n    vec_select(a, b, a, sizeof(ptype), cbit);\t\t\t\t\\\n}\n\n#define DECLARE_PRIVATE_POINTXZ(ptype, bits) \\\ntypedef struct { vec##bits X,Z; } ptype##xz; \\\n\\\nstatic void ptype##xz_ladder_pre(ptype##xz *out, const ptype *in);\t\\\nstatic void ptype##xz_ladder_step(ptype##xz *r, ptype##xz *s,\t\t\\\n                                  const ptype##xz *p);\t\t\t\\\nstatic void ptype##xz_ladder_post(ptype *ret,\t\t\t\t\\\n                                  const ptype##xz *r, const ptype##xz *s, \\\n                                  const ptype##xz *p, const vec##bits Y1);\\\n\\\nstatic inline void ptype##xz_cswap(ptype##xz *restrict a,\t\t\\\n                                   ptype##xz *restrict b, bool_t cbit) {\\\n    vec_cswap(a, b, sizeof(ptype##xz), cbit);\t\t\t\t\\\n}\n\nDECLARE_POINT(POINTonE1, 384)\n\nDECLARE_POINT(POINTonE2, 384x)\n\n#ifdef __GNUC__\n# pragma GCC diagnostic ignored \"-Wunused-function\"\n#endif\n\n#endif\n"
  },
  {
    "path": "src/rb_tree.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include <stddef.h>\n\n/*\n * Red-black tree tailored for uniqueness test. Amount of messages to be\n * checked is known prior context initialization, implementation is\n * insert-only, failure is returned if message is already in the tree.\n */\n\nstruct node {\n    struct node *leafs[2];\n    const void *data;\n    size_t len_n_colour;    /* len<<1 | colour */\n};\n\nstruct rb_tree {\n    struct node *root;\n    size_t n_nodes;\n    struct node nodes[1];\n};\n\nstatic long bytes_compare(const unsigned char *ptr0, size_t len0,\n                          const unsigned char *ptr1, size_t len1)\n{\n    size_t i, len = len0<len1 ? len0 : len1;\n    long a, b;\n\n    for (i=0; i<len; i++) {\n        if ((a = ptr0[i]) != (b = ptr1[i]))\n            return a - b;\n    }\n\n    return (long)len0 - (long)len1;\n}\n\n#define PAINT_BLACK(p)  ((p)->len_n_colour &= ~(size_t)1)\n#define PAINT_RED(p)    ((p)->len_n_colour |= 1)\n#define IS_RED(p)       ((p)->len_n_colour & 1)\n\nstatic int rb_tree_insert(struct rb_tree *tree, const void *data, size_t len)\n{\n    struct node *nodes[8*sizeof(void *)];   /* visited nodes    */\n    unsigned char dirs[8*sizeof(void *)];   /* taken directions */\n    size_t k = 0;                           /* walked distance  */\n    struct node *p, *y, *z;\n\n    for (p = tree->root; p != NULL; k++) {\n        long cmp = bytes_compare(data, len, p->data, p->len_n_colour>>1);\n\n        if (cmp == 0)\n            return 0;   /* already in tree, no insertion */\n\n        /* record the step */\n        nodes[k] = p;\n        p = p->leafs[(dirs[k] = cmp>0)];\n    }\n\n    /* allocate new node */\n    z = &tree->nodes[tree->n_nodes++];\n    z->leafs[0] = z->leafs[1] = NULL;\n    z->data = data;\n    z->len_n_colour = len<<1;\n    PAINT_RED(z);\n\n    /* graft |z| */\n    if (k > 0)\n        nodes[k-1]->leafs[dirs[k-1]] = z;\n    else\n        tree->root = z;\n\n    /* re-balance |tree| */\n    while (k >= 2 && IS_RED(y = nodes[k-1])) {\n        size_t ydir = dirs[k-2];\n        struct node *x = nodes[k-2],        /* |z|'s grandparent    */\n                    *s = x->leafs[ydir^1];  /* |z|'s uncle          */\n\n        if (s != NULL && IS_RED(s)) {\n            PAINT_RED(x);\n            PAINT_BLACK(y);\n            PAINT_BLACK(s);\n            k -= 2;\n        } else {\n            if (dirs[k-1] != ydir) {\n                /*    |        |\n                 *    x        x\n                 *   / \\        \\\n                 *  y   s -> z   s\n                 *   \\      /\n                 *    z    y\n                 *   /      \\\n                 *  ?        ?\n                 */\n                struct node *t = y;\n                y = y->leafs[ydir^1];\n                t->leafs[ydir^1] = y->leafs[ydir];\n                y->leafs[ydir] = t;\n            }\n\n            /*      |        |\n             *      x        y\n             *       \\      / \\\n             *    y   s -> z   x\n             *   / \\          / \\\n             *  z   ?        ?   s\n             */\n            x->leafs[ydir] = y->leafs[ydir^1];\n            y->leafs[ydir^1] = x;\n\n            PAINT_RED(x);\n            PAINT_BLACK(y);\n\n            if (k > 2)\n                nodes[k-3]->leafs[dirs[k-3]] = y;\n            else\n                tree->root = y;\n\n            break;\n        }\n    }\n\n    PAINT_BLACK(tree->root);\n\n    return 1;\n}\n\n#undef IS_RED\n#undef PAINT_RED\n#undef PAINT_BLACK\n\nsize_t blst_uniq_sizeof(size_t n_nodes)\n{   return sizeof(struct rb_tree) + sizeof(struct node)*(n_nodes-1);   }\n\nvoid blst_uniq_init(struct rb_tree *tree)\n{\n    tree->root = NULL;\n    tree->n_nodes = 0;\n}\n\nint blst_uniq_test(struct rb_tree *tree, const void *data, size_t len)\n{   return (int)rb_tree_insert(tree, data, len);   }\n"
  },
  {
    "path": "src/recip-addchain.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n/*\n * The \"magic\" number is BLS12_381_P-2. Exponentiation to which yields\n * reciprocal to input base.\n *\n * Generated with 'addchain 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559785'\n * https://github.com/kwantam/addchain\n *\n * # Bos-Coster (win=4)           :  461 (16) <<<\n * # Bos-Coster (win=3)           :  464 ( 9)\n * # Bos-Coster (win=8)           :  469 (35)\n * # Bos-Coster (win=5)           :  463 (28)\n * # Bos-Coster (win=9)           :  467 (32)\n * # Bos-Coster (win=7)           :  462 (27)\n * # Yacobi                       :  481 (31)\n * # Bos-Coster (win=10)          :  475 (30)\n * # Bos-Coster (win=6)           :  463 (32)\n * # Bos-Coster (win=2)           :  489 ( 5)\n * # Bergeron-Berstel-Brlek-Duboc :  498 ( 5)\n */\n\n#define RECIPROCAL_MOD_BLS12_381_P(out, inp, ptype) do { \\\nptype t[16]; \\\nvec_copy(t[1], inp, sizeof(ptype)); /*    0: 1 */\\\nsqr(t[0], t[1]);                    /*    1: 2 */\\\nmul(t[9], t[0], t[1]);              /*    2: 3 */\\\nsqr(t[5], t[0]);                    /*    3: 4 */\\\nmul(t[2], t[9], t[0]);              /*    4: 5 */\\\nmul(t[7], t[5], t[9]);              /*    5: 7 */\\\nmul(t[10], t[2], t[5]);             /*    6: 9 */\\\nmul(t[13], t[7], t[5]);             /*    7: b */\\\nmul(t[4], t[10], t[5]);             /*    8: d */\\\nmul(t[8], t[13], t[5]);             /*    9: f */\\\nmul(t[15], t[4], t[5]);             /*   10: 11 */\\\nmul(t[11], t[8], t[5]);             /*   11: 13 */\\\nmul(t[3], t[15], t[5]);             /*   12: 15 */\\\nmul(t[12], t[11], t[5]);            /*   13: 17 */\\\nsqr(t[0], t[4]);                    /*   14: 1a */\\\nmul(t[14], t[12], t[5]);            /*   15: 1b */\\\nmul(t[6], t[0], t[9]);              /*   16: 1d */\\\nmul(t[5], t[0], t[2]);              /*   17: 1f */\\\n/* sqr(t[0], t[0]); */              /*   18: 34 */\\\n/* sqr(t[0], t[0]); */              /*   19: 68 */\\\n/* sqr(t[0], t[0]); */              /*   20: d0 */\\\n/* sqr(t[0], t[0]); */              /*   21: 1a0 */\\\n/* sqr(t[0], t[0]); */              /*   22: 340 */\\\n/* sqr(t[0], t[0]); */              /*   23: 680 */\\\n/* sqr(t[0], t[0]); */              /*   24: d00 */\\\n/* sqr(t[0], t[0]); */              /*   25: 1a00 */\\\n/* sqr(t[0], t[0]); */              /*   26: 3400 */\\\n/* sqr(t[0], t[0]); */              /*   27: 6800 */\\\n/* sqr(t[0], t[0]); */              /*   28: d000 */\\\n/* sqr(t[0], t[0]); */              /*   29: 1a000 */\\\nsqr_n_mul(t[0], t[0], 12, t[15]);   /*   30: 1a011 */\\\n/* sqr(t[0], t[0]); */              /*   31: 34022 */\\\n/* sqr(t[0], t[0]); */              /*   32: 68044 */\\\n/* sqr(t[0], t[0]); */              /*   33: d0088 */\\\n/* sqr(t[0], t[0]); */              /*   34: 1a0110 */\\\n/* sqr(t[0], t[0]); */              /*   35: 340220 */\\\n/* sqr(t[0], t[0]); */              /*   36: 680440 */\\\n/* sqr(t[0], t[0]); */              /*   37: d00880 */\\\nsqr_n_mul(t[0], t[0], 7, t[8]);     /*   38: d0088f */\\\n/* sqr(t[0], t[0]); */              /*   39: 1a0111e */\\\n/* sqr(t[0], t[0]); */              /*   40: 340223c */\\\n/* sqr(t[0], t[0]); */              /*   41: 6804478 */\\\n/* sqr(t[0], t[0]); */              /*   42: d0088f0 */\\\nsqr_n_mul(t[0], t[0], 4, t[2]);     /*   43: d0088f5 */\\\n/* sqr(t[0], t[0]); */              /*   44: 1a0111ea */\\\n/* sqr(t[0], t[0]); */              /*   45: 340223d4 */\\\n/* sqr(t[0], t[0]); */              /*   46: 680447a8 */\\\n/* sqr(t[0], t[0]); */              /*   47: d0088f50 */\\\n/* sqr(t[0], t[0]); */              /*   48: 1a0111ea0 */\\\n/* sqr(t[0], t[0]); */              /*   49: 340223d40 */\\\nsqr_n_mul(t[0], t[0], 6, t[7]);     /*   50: 340223d47 */\\\n/* sqr(t[0], t[0]); */              /*   51: 680447a8e */\\\n/* sqr(t[0], t[0]); */              /*   52: d0088f51c */\\\n/* sqr(t[0], t[0]); */              /*   53: 1a0111ea38 */\\\n/* sqr(t[0], t[0]); */              /*   54: 340223d470 */\\\n/* sqr(t[0], t[0]); */              /*   55: 680447a8e0 */\\\n/* sqr(t[0], t[0]); */              /*   56: d0088f51c0 */\\\n/* sqr(t[0], t[0]); */              /*   57: 1a0111ea380 */\\\nsqr_n_mul(t[0], t[0], 7, t[12]);    /*   58: 1a0111ea397 */\\\n/* sqr(t[0], t[0]); */              /*   59: 340223d472e */\\\n/* sqr(t[0], t[0]); */              /*   60: 680447a8e5c */\\\n/* sqr(t[0], t[0]); */              /*   61: d0088f51cb8 */\\\n/* sqr(t[0], t[0]); */              /*   62: 1a0111ea3970 */\\\n/* sqr(t[0], t[0]); */              /*   63: 340223d472e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*   64: 340223d472ff */\\\n/* sqr(t[0], t[0]); */              /*   65: 680447a8e5fe */\\\n/* sqr(t[0], t[0]); */              /*   66: d0088f51cbfc */\\\nsqr_n_mul(t[0], t[0], 2, t[9]);     /*   67: d0088f51cbff */\\\n/* sqr(t[0], t[0]); */              /*   68: 1a0111ea397fe */\\\n/* sqr(t[0], t[0]); */              /*   69: 340223d472ffc */\\\n/* sqr(t[0], t[0]); */              /*   70: 680447a8e5ff8 */\\\n/* sqr(t[0], t[0]); */              /*   71: d0088f51cbff0 */\\\n/* sqr(t[0], t[0]); */              /*   72: 1a0111ea397fe0 */\\\n/* sqr(t[0], t[0]); */              /*   73: 340223d472ffc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[4]);     /*   74: 340223d472ffcd */\\\n/* sqr(t[0], t[0]); */              /*   75: 680447a8e5ff9a */\\\n/* sqr(t[0], t[0]); */              /*   76: d0088f51cbff34 */\\\n/* sqr(t[0], t[0]); */              /*   77: 1a0111ea397fe68 */\\\n/* sqr(t[0], t[0]); */              /*   78: 340223d472ffcd0 */\\\n/* sqr(t[0], t[0]); */              /*   79: 680447a8e5ff9a0 */\\\n/* sqr(t[0], t[0]); */              /*   80: d0088f51cbff340 */\\\nsqr_n_mul(t[0], t[0], 6, t[4]);     /*   81: d0088f51cbff34d */\\\n/* sqr(t[0], t[0]); */              /*   82: 1a0111ea397fe69a */\\\n/* sqr(t[0], t[0]); */              /*   83: 340223d472ffcd34 */\\\n/* sqr(t[0], t[0]); */              /*   84: 680447a8e5ff9a68 */\\\n/* sqr(t[0], t[0]); */              /*   85: d0088f51cbff34d0 */\\\n/* sqr(t[0], t[0]); */              /*   86: 1a0111ea397fe69a0 */\\\n/* sqr(t[0], t[0]); */              /*   87: 340223d472ffcd340 */\\\nsqr_n_mul(t[0], t[0], 6, t[10]);    /*   88: 340223d472ffcd349 */\\\n/* sqr(t[0], t[0]); */              /*   89: 680447a8e5ff9a692 */\\\n/* sqr(t[0], t[0]); */              /*   90: d0088f51cbff34d24 */\\\n/* sqr(t[0], t[0]); */              /*   91: 1a0111ea397fe69a48 */\\\nsqr_n_mul(t[0], t[0], 3, t[9]);     /*   92: 1a0111ea397fe69a4b */\\\n/* sqr(t[0], t[0]); */              /*   93: 340223d472ffcd3496 */\\\n/* sqr(t[0], t[0]); */              /*   94: 680447a8e5ff9a692c */\\\n/* sqr(t[0], t[0]); */              /*   95: d0088f51cbff34d258 */\\\n/* sqr(t[0], t[0]); */              /*   96: 1a0111ea397fe69a4b0 */\\\n/* sqr(t[0], t[0]); */              /*   97: 340223d472ffcd34960 */\\\n/* sqr(t[0], t[0]); */              /*   98: 680447a8e5ff9a692c0 */\\\n/* sqr(t[0], t[0]); */              /*   99: d0088f51cbff34d2580 */\\\nsqr_n_mul(t[0], t[0], 7, t[4]);     /*  100: d0088f51cbff34d258d */\\\n/* sqr(t[0], t[0]); */              /*  101: 1a0111ea397fe69a4b1a */\\\n/* sqr(t[0], t[0]); */              /*  102: 340223d472ffcd349634 */\\\n/* sqr(t[0], t[0]); */              /*  103: 680447a8e5ff9a692c68 */\\\n/* sqr(t[0], t[0]); */              /*  104: d0088f51cbff34d258d0 */\\\nsqr_n_mul(t[0], t[0], 4, t[4]);     /*  105: d0088f51cbff34d258dd */\\\n/* sqr(t[0], t[0]); */              /*  106: 1a0111ea397fe69a4b1ba */\\\n/* sqr(t[0], t[0]); */              /*  107: 340223d472ffcd3496374 */\\\n/* sqr(t[0], t[0]); */              /*  108: 680447a8e5ff9a692c6e8 */\\\n/* sqr(t[0], t[0]); */              /*  109: d0088f51cbff34d258dd0 */\\\n/* sqr(t[0], t[0]); */              /*  110: 1a0111ea397fe69a4b1ba0 */\\\n/* sqr(t[0], t[0]); */              /*  111: 340223d472ffcd34963740 */\\\nsqr_n_mul(t[0], t[0], 6, t[8]);     /*  112: 340223d472ffcd3496374f */\\\n/* sqr(t[0], t[0]); */              /*  113: 680447a8e5ff9a692c6e9e */\\\n/* sqr(t[0], t[0]); */              /*  114: d0088f51cbff34d258dd3c */\\\n/* sqr(t[0], t[0]); */              /*  115: 1a0111ea397fe69a4b1ba78 */\\\n/* sqr(t[0], t[0]); */              /*  116: 340223d472ffcd3496374f0 */\\\n/* sqr(t[0], t[0]); */              /*  117: 680447a8e5ff9a692c6e9e0 */\\\n/* sqr(t[0], t[0]); */              /*  118: d0088f51cbff34d258dd3c0 */\\\nsqr_n_mul(t[0], t[0], 6, t[14]);    /*  119: d0088f51cbff34d258dd3db */\\\n/* sqr(t[0], t[0]); */              /*  120: 1a0111ea397fe69a4b1ba7b6 */\\\n/* sqr(t[0], t[0]); */              /*  121: 340223d472ffcd3496374f6c */\\\n/* sqr(t[0], t[0]); */              /*  122: 680447a8e5ff9a692c6e9ed8 */\\\nsqr_n_mul(t[0], t[0], 3, t[1]);     /*  123: 680447a8e5ff9a692c6e9ed9 */\\\n/* sqr(t[0], t[0]); */              /*  124: d0088f51cbff34d258dd3db2 */\\\n/* sqr(t[0], t[0]); */              /*  125: 1a0111ea397fe69a4b1ba7b64 */\\\n/* sqr(t[0], t[0]); */              /*  126: 340223d472ffcd3496374f6c8 */\\\n/* sqr(t[0], t[0]); */              /*  127: 680447a8e5ff9a692c6e9ed90 */\\\n/* sqr(t[0], t[0]); */              /*  128: d0088f51cbff34d258dd3db20 */\\\n/* sqr(t[0], t[0]); */              /*  129: 1a0111ea397fe69a4b1ba7b640 */\\\n/* sqr(t[0], t[0]); */              /*  130: 340223d472ffcd3496374f6c80 */\\\n/* sqr(t[0], t[0]); */              /*  131: 680447a8e5ff9a692c6e9ed900 */\\\nsqr_n_mul(t[0], t[0], 8, t[4]);     /*  132: 680447a8e5ff9a692c6e9ed90d */\\\n/* sqr(t[0], t[0]); */              /*  133: d0088f51cbff34d258dd3db21a */\\\n/* sqr(t[0], t[0]); */              /*  134: 1a0111ea397fe69a4b1ba7b6434 */\\\n/* sqr(t[0], t[0]); */              /*  135: 340223d472ffcd3496374f6c868 */\\\n/* sqr(t[0], t[0]); */              /*  136: 680447a8e5ff9a692c6e9ed90d0 */\\\n/* sqr(t[0], t[0]); */              /*  137: d0088f51cbff34d258dd3db21a0 */\\\n/* sqr(t[0], t[0]); */              /*  138: 1a0111ea397fe69a4b1ba7b64340 */\\\n/* sqr(t[0], t[0]); */              /*  139: 340223d472ffcd3496374f6c8680 */\\\nsqr_n_mul(t[0], t[0], 7, t[12]);    /*  140: 340223d472ffcd3496374f6c8697 */\\\n/* sqr(t[0], t[0]); */              /*  141: 680447a8e5ff9a692c6e9ed90d2e */\\\n/* sqr(t[0], t[0]); */              /*  142: d0088f51cbff34d258dd3db21a5c */\\\n/* sqr(t[0], t[0]); */              /*  143: 1a0111ea397fe69a4b1ba7b6434b8 */\\\n/* sqr(t[0], t[0]); */              /*  144: 340223d472ffcd3496374f6c86970 */\\\n/* sqr(t[0], t[0]); */              /*  145: 680447a8e5ff9a692c6e9ed90d2e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[13]);    /*  146: 680447a8e5ff9a692c6e9ed90d2eb */\\\n/* sqr(t[0], t[0]); */              /*  147: d0088f51cbff34d258dd3db21a5d6 */\\\n/* sqr(t[0], t[0]); */              /*  148: 1a0111ea397fe69a4b1ba7b6434bac */\\\n/* sqr(t[0], t[0]); */              /*  149: 340223d472ffcd3496374f6c869758 */\\\n/* sqr(t[0], t[0]); */              /*  150: 680447a8e5ff9a692c6e9ed90d2eb0 */\\\n/* sqr(t[0], t[0]); */              /*  151: d0088f51cbff34d258dd3db21a5d60 */\\\n/* sqr(t[0], t[0]); */              /*  152: 1a0111ea397fe69a4b1ba7b6434bac0 */\\\nsqr_n_mul(t[0], t[0], 6, t[4]);     /*  153: 1a0111ea397fe69a4b1ba7b6434bacd */\\\n/* sqr(t[0], t[0]); */              /*  154: 340223d472ffcd3496374f6c869759a */\\\n/* sqr(t[0], t[0]); */              /*  155: 680447a8e5ff9a692c6e9ed90d2eb34 */\\\n/* sqr(t[0], t[0]); */              /*  156: d0088f51cbff34d258dd3db21a5d668 */\\\n/* sqr(t[0], t[0]); */              /*  157: 1a0111ea397fe69a4b1ba7b6434bacd0 */\\\n/* sqr(t[0], t[0]); */              /*  158: 340223d472ffcd3496374f6c869759a0 */\\\n/* sqr(t[0], t[0]); */              /*  159: 680447a8e5ff9a692c6e9ed90d2eb340 */\\\nsqr_n_mul(t[0], t[0], 6, t[6]);     /*  160: 680447a8e5ff9a692c6e9ed90d2eb35d */\\\n/* sqr(t[0], t[0]); */              /*  161: d0088f51cbff34d258dd3db21a5d66ba */\\\n/* sqr(t[0], t[0]); */              /*  162: 1a0111ea397fe69a4b1ba7b6434bacd74 */\\\n/* sqr(t[0], t[0]); */              /*  163: 340223d472ffcd3496374f6c869759ae8 */\\\n/* sqr(t[0], t[0]); */              /*  164: 680447a8e5ff9a692c6e9ed90d2eb35d0 */\\\nsqr_n_mul(t[0], t[0], 4, t[10]);    /*  165: 680447a8e5ff9a692c6e9ed90d2eb35d9 */\\\n/* sqr(t[0], t[0]); */              /*  166: d0088f51cbff34d258dd3db21a5d66bb2 */\\\n/* sqr(t[0], t[0]); */              /*  167: 1a0111ea397fe69a4b1ba7b6434bacd764 */\\\n/* sqr(t[0], t[0]); */              /*  168: 340223d472ffcd3496374f6c869759aec8 */\\\n/* sqr(t[0], t[0]); */              /*  169: 680447a8e5ff9a692c6e9ed90d2eb35d90 */\\\n/* sqr(t[0], t[0]); */              /*  170: d0088f51cbff34d258dd3db21a5d66bb20 */\\\n/* sqr(t[0], t[0]); */              /*  171: 1a0111ea397fe69a4b1ba7b6434bacd7640 */\\\n/* sqr(t[0], t[0]); */              /*  172: 340223d472ffcd3496374f6c869759aec80 */\\\n/* sqr(t[0], t[0]); */              /*  173: 680447a8e5ff9a692c6e9ed90d2eb35d900 */\\\nsqr_n_mul(t[0], t[0], 8, t[6]);     /*  174: 680447a8e5ff9a692c6e9ed90d2eb35d91d */\\\n/* sqr(t[0], t[0]); */              /*  175: d0088f51cbff34d258dd3db21a5d66bb23a */\\\n/* sqr(t[0], t[0]); */              /*  176: 1a0111ea397fe69a4b1ba7b6434bacd76474 */\\\n/* sqr(t[0], t[0]); */              /*  177: 340223d472ffcd3496374f6c869759aec8e8 */\\\n/* sqr(t[0], t[0]); */              /*  178: 680447a8e5ff9a692c6e9ed90d2eb35d91d0 */\\\nsqr_n_mul(t[0], t[0], 4, t[4]);     /*  179: 680447a8e5ff9a692c6e9ed90d2eb35d91dd */\\\n/* sqr(t[0], t[0]); */              /*  180: d0088f51cbff34d258dd3db21a5d66bb23ba */\\\n/* sqr(t[0], t[0]); */              /*  181: 1a0111ea397fe69a4b1ba7b6434bacd764774 */\\\n/* sqr(t[0], t[0]); */              /*  182: 340223d472ffcd3496374f6c869759aec8ee8 */\\\n/* sqr(t[0], t[0]); */              /*  183: 680447a8e5ff9a692c6e9ed90d2eb35d91dd0 */\\\n/* sqr(t[0], t[0]); */              /*  184: d0088f51cbff34d258dd3db21a5d66bb23ba0 */\\\n/* sqr(t[0], t[0]); */              /*  185: 1a0111ea397fe69a4b1ba7b6434bacd7647740 */\\\n/* sqr(t[0], t[0]); */              /*  186: 340223d472ffcd3496374f6c869759aec8ee80 */\\\nsqr_n_mul(t[0], t[0], 7, t[12]);    /*  187: 340223d472ffcd3496374f6c869759aec8ee97 */\\\n/* sqr(t[0], t[0]); */              /*  188: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e */\\\n/* sqr(t[0], t[0]); */              /*  189: d0088f51cbff34d258dd3db21a5d66bb23ba5c */\\\n/* sqr(t[0], t[0]); */              /*  190: 1a0111ea397fe69a4b1ba7b6434bacd764774b8 */\\\n/* sqr(t[0], t[0]); */              /*  191: 340223d472ffcd3496374f6c869759aec8ee970 */\\\n/* sqr(t[0], t[0]); */              /*  192: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e0 */\\\n/* sqr(t[0], t[0]); */              /*  193: d0088f51cbff34d258dd3db21a5d66bb23ba5c0 */\\\n/* sqr(t[0], t[0]); */              /*  194: 1a0111ea397fe69a4b1ba7b6434bacd764774b80 */\\\n/* sqr(t[0], t[0]); */              /*  195: 340223d472ffcd3496374f6c869759aec8ee9700 */\\\n/* sqr(t[0], t[0]); */              /*  196: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e00 */\\\nsqr_n_mul(t[0], t[0], 9, t[11]);    /*  197: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13 */\\\n/* sqr(t[0], t[0]); */              /*  198: d0088f51cbff34d258dd3db21a5d66bb23ba5c26 */\\\n/* sqr(t[0], t[0]); */              /*  199: 1a0111ea397fe69a4b1ba7b6434bacd764774b84c */\\\nsqr_n_mul(t[0], t[0], 2, t[9]);     /*  200: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f */\\\n/* sqr(t[0], t[0]); */              /*  201: 340223d472ffcd3496374f6c869759aec8ee9709e */\\\n/* sqr(t[0], t[0]); */              /*  202: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13c */\\\n/* sqr(t[0], t[0]); */              /*  203: d0088f51cbff34d258dd3db21a5d66bb23ba5c278 */\\\n/* sqr(t[0], t[0]); */              /*  204: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f0 */\\\n/* sqr(t[0], t[0]); */              /*  205: 340223d472ffcd3496374f6c869759aec8ee9709e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[7]);     /*  206: 340223d472ffcd3496374f6c869759aec8ee9709e7 */\\\n/* sqr(t[0], t[0]); */              /*  207: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce */\\\n/* sqr(t[0], t[0]); */              /*  208: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c */\\\n/* sqr(t[0], t[0]); */              /*  209: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38 */\\\n/* sqr(t[0], t[0]); */              /*  210: 340223d472ffcd3496374f6c869759aec8ee9709e70 */\\\n/* sqr(t[0], t[0]); */              /*  211: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce0 */\\\n/* sqr(t[0], t[0]); */              /*  212: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c0 */\\\n/* sqr(t[0], t[0]); */              /*  213: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f380 */\\\nsqr_n_mul(t[0], t[0], 7, t[2]);     /*  214: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385 */\\\n/* sqr(t[0], t[0]); */              /*  215: 340223d472ffcd3496374f6c869759aec8ee9709e70a */\\\n/* sqr(t[0], t[0]); */              /*  216: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce14 */\\\n/* sqr(t[0], t[0]); */              /*  217: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c28 */\\\n/* sqr(t[0], t[0]); */              /*  218: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f3850 */\\\n/* sqr(t[0], t[0]); */              /*  219: 340223d472ffcd3496374f6c869759aec8ee9709e70a0 */\\\n/* sqr(t[0], t[0]); */              /*  220: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce140 */\\\n/* sqr(t[0], t[0]); */              /*  221: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c280 */\\\nsqr_n_mul(t[0], t[0], 7, t[10]);    /*  222: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c289 */\\\n/* sqr(t[0], t[0]); */              /*  223: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512 */\\\n/* sqr(t[0], t[0]); */              /*  224: 340223d472ffcd3496374f6c869759aec8ee9709e70a24 */\\\n/* sqr(t[0], t[0]); */              /*  225: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce1448 */\\\n/* sqr(t[0], t[0]); */              /*  226: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2890 */\\\n/* sqr(t[0], t[0]); */              /*  227: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385120 */\\\n/* sqr(t[0], t[0]); */              /*  228: 340223d472ffcd3496374f6c869759aec8ee9709e70a240 */\\\nsqr_n_mul(t[0], t[0], 6, t[12]);    /*  229: 340223d472ffcd3496374f6c869759aec8ee9709e70a257 */\\\n/* sqr(t[0], t[0]); */              /*  230: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae */\\\n/* sqr(t[0], t[0]); */              /*  231: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895c */\\\n/* sqr(t[0], t[0]); */              /*  232: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512b8 */\\\n/* sqr(t[0], t[0]); */              /*  233: 340223d472ffcd3496374f6c869759aec8ee9709e70a2570 */\\\n/* sqr(t[0], t[0]); */              /*  234: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae0 */\\\nsqr_n_mul(t[0], t[0], 5, t[6]);     /*  235: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd */\\\n/* sqr(t[0], t[0]); */              /*  236: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa */\\\n/* sqr(t[0], t[0]); */              /*  237: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf4 */\\\n/* sqr(t[0], t[0]); */              /*  238: 340223d472ffcd3496374f6c869759aec8ee9709e70a257e8 */\\\n/* sqr(t[0], t[0]); */              /*  239: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd0 */\\\n/* sqr(t[0], t[0]); */              /*  240: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa0 */\\\nsqr_n_mul(t[0], t[0], 5, t[11]);    /*  241: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3 */\\\n/* sqr(t[0], t[0]); */              /*  242: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf66 */\\\n/* sqr(t[0], t[0]); */              /*  243: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ecc */\\\n/* sqr(t[0], t[0]); */              /*  244: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd98 */\\\n/* sqr(t[0], t[0]); */              /*  245: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb30 */\\\n/* sqr(t[0], t[0]); */              /*  246: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf660 */\\\nsqr_n_mul(t[0], t[0], 5, t[11]);    /*  247: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf673 */\\\n/* sqr(t[0], t[0]); */              /*  248: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece6 */\\\n/* sqr(t[0], t[0]); */              /*  249: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc */\\\n/* sqr(t[0], t[0]); */              /*  250: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398 */\\\n/* sqr(t[0], t[0]); */              /*  251: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730 */\\\n/* sqr(t[0], t[0]); */              /*  252: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece60 */\\\n/* sqr(t[0], t[0]); */              /*  253: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc0 */\\\n/* sqr(t[0], t[0]); */              /*  254: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3980 */\\\n/* sqr(t[0], t[0]); */              /*  255: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf67300 */\\\nsqr_n_mul(t[0], t[0], 8, t[4]);     /*  256: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d */\\\n/* sqr(t[0], t[0]); */              /*  257: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a */\\\n/* sqr(t[0], t[0]); */              /*  258: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34 */\\\n/* sqr(t[0], t[0]); */              /*  259: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39868 */\\\n/* sqr(t[0], t[0]); */              /*  260: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d0 */\\\n/* sqr(t[0], t[0]); */              /*  261: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a0 */\\\n/* sqr(t[0], t[0]); */              /*  262: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc340 */\\\n/* sqr(t[0], t[0]); */              /*  263: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398680 */\\\nsqr_n_mul(t[0], t[0], 7, t[3]);     /*  264: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695 */\\\n/* sqr(t[0], t[0]); */              /*  265: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a */\\\n/* sqr(t[0], t[0]); */              /*  266: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a54 */\\\n/* sqr(t[0], t[0]); */              /*  267: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a8 */\\\n/* sqr(t[0], t[0]); */              /*  268: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3986950 */\\\n/* sqr(t[0], t[0]); */              /*  269: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0 */\\\n/* sqr(t[0], t[0]); */              /*  270: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a540 */\\\n/* sqr(t[0], t[0]); */              /*  271: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a80 */\\\n/* sqr(t[0], t[0]); */              /*  272: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869500 */\\\n/* sqr(t[0], t[0]); */              /*  273: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a00 */\\\nsqr_n_mul(t[0], t[0], 9, t[8]);     /*  274: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f */\\\n/* sqr(t[0], t[0]); */              /*  275: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e */\\\n/* sqr(t[0], t[0]); */              /*  276: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83c */\\\n/* sqr(t[0], t[0]); */              /*  277: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695078 */\\\n/* sqr(t[0], t[0]); */              /*  278: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f0 */\\\n/* sqr(t[0], t[0]); */              /*  279: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  280: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed */\\\n/* sqr(t[0], t[0]); */              /*  281: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83da */\\\n/* sqr(t[0], t[0]); */              /*  282: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b4 */\\\n/* sqr(t[0], t[0]); */              /*  283: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f68 */\\\nsqr_n_mul(t[0], t[0], 3, t[9]);     /*  284: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b */\\\n/* sqr(t[0], t[0]); */              /*  285: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed6 */\\\n/* sqr(t[0], t[0]); */              /*  286: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac */\\\n/* sqr(t[0], t[0]); */              /*  287: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b58 */\\\n/* sqr(t[0], t[0]); */              /*  288: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0 */\\\n/* sqr(t[0], t[0]); */              /*  289: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed60 */\\\n/* sqr(t[0], t[0]); */              /*  290: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac0 */\\\n/* sqr(t[0], t[0]); */              /*  291: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b580 */\\\n/* sqr(t[0], t[0]); */              /*  292: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b00 */\\\nsqr_n_mul(t[0], t[0], 8, t[8]);     /*  293: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f */\\\n/* sqr(t[0], t[0]); */              /*  294: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61e */\\\n/* sqr(t[0], t[0]); */              /*  295: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3c */\\\n/* sqr(t[0], t[0]); */              /*  296: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b5878 */\\\nsqr_n_mul(t[0], t[0], 3, t[9]);     /*  297: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b */\\\n/* sqr(t[0], t[0]); */              /*  298: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6 */\\\n/* sqr(t[0], t[0]); */              /*  299: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec */\\\n/* sqr(t[0], t[0]); */              /*  300: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8 */\\\n/* sqr(t[0], t[0]); */              /*  301: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b0 */\\\n/* sqr(t[0], t[0]); */              /*  302: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f60 */\\\n/* sqr(t[0], t[0]); */              /*  303: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec0 */\\\n/* sqr(t[0], t[0]); */              /*  304: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d80 */\\\nsqr_n_mul(t[0], t[0], 7, t[10]);    /*  305: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89 */\\\n/* sqr(t[0], t[0]); */              /*  306: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b12 */\\\n/* sqr(t[0], t[0]); */              /*  307: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624 */\\\n/* sqr(t[0], t[0]); */              /*  308: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec48 */\\\n/* sqr(t[0], t[0]); */              /*  309: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d890 */\\\n/* sqr(t[0], t[0]); */              /*  310: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120 */\\\n/* sqr(t[0], t[0]); */              /*  311: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6240 */\\\n/* sqr(t[0], t[0]); */              /*  312: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec480 */\\\n/* sqr(t[0], t[0]); */              /*  313: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8900 */\\\n/* sqr(t[0], t[0]); */              /*  314: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b1200 */\\\nsqr_n_mul(t[0], t[0], 9, t[8]);     /*  315: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f */\\\n/* sqr(t[0], t[0]); */              /*  316: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e */\\\n/* sqr(t[0], t[0]); */              /*  317: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c */\\\n/* sqr(t[0], t[0]); */              /*  318: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89078 */\\\n/* sqr(t[0], t[0]); */              /*  319: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f0 */\\\n/* sqr(t[0], t[0]); */              /*  320: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e0 */\\\n/* sqr(t[0], t[0]); */              /*  321: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c0 */\\\nsqr_n_mul(t[0], t[0], 6, t[3]);     /*  322: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d5 */\\\n/* sqr(t[0], t[0]); */              /*  323: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa */\\\n/* sqr(t[0], t[0]); */              /*  324: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f54 */\\\n/* sqr(t[0], t[0]); */              /*  325: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241ea8 */\\\n/* sqr(t[0], t[0]); */              /*  326: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d50 */\\\n/* sqr(t[0], t[0]); */              /*  327: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa0 */\\\n/* sqr(t[0], t[0]); */              /*  328: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f540 */\\\nsqr_n_mul(t[0], t[0], 6, t[5]);     /*  329: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f */\\\n/* sqr(t[0], t[0]); */              /*  330: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe */\\\n/* sqr(t[0], t[0]); */              /*  331: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57c */\\\n/* sqr(t[0], t[0]); */              /*  332: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaf8 */\\\n/* sqr(t[0], t[0]); */              /*  333: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f0 */\\\n/* sqr(t[0], t[0]); */              /*  334: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  335: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff */\\\n/* sqr(t[0], t[0]); */              /*  336: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe */\\\n/* sqr(t[0], t[0]); */              /*  337: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffc */\\\n/* sqr(t[0], t[0]); */              /*  338: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ff8 */\\\n/* sqr(t[0], t[0]); */              /*  339: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff0 */\\\n/* sqr(t[0], t[0]); */              /*  340: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  341: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff */\\\n/* sqr(t[0], t[0]); */              /*  342: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aafffe */\\\n/* sqr(t[0], t[0]); */              /*  343: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55fffc */\\\n/* sqr(t[0], t[0]); */              /*  344: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfff8 */\\\n/* sqr(t[0], t[0]); */              /*  345: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[4]);     /*  346: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd */\\\n/* sqr(t[0], t[0]); */              /*  347: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffa */\\\n/* sqr(t[0], t[0]); */              /*  348: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff4 */\\\n/* sqr(t[0], t[0]); */              /*  349: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffe8 */\\\nsqr_n_mul(t[0], t[0], 3, t[9]);     /*  350: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb */\\\n/* sqr(t[0], t[0]); */              /*  351: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd6 */\\\n/* sqr(t[0], t[0]); */              /*  352: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac */\\\n/* sqr(t[0], t[0]); */              /*  353: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58 */\\\n/* sqr(t[0], t[0]); */              /*  354: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb0 */\\\n/* sqr(t[0], t[0]); */              /*  355: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd60 */\\\n/* sqr(t[0], t[0]); */              /*  356: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac0 */\\\n/* sqr(t[0], t[0]); */              /*  357: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff580 */\\\n/* sqr(t[0], t[0]); */              /*  358: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb00 */\\\nsqr_n_mul(t[0], t[0], 8, t[3]);     /*  359: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb15 */\\\n/* sqr(t[0], t[0]); */              /*  360: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a */\\\n/* sqr(t[0], t[0]); */              /*  361: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54 */\\\n/* sqr(t[0], t[0]); */              /*  362: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a8 */\\\n/* sqr(t[0], t[0]); */              /*  363: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb150 */\\\n/* sqr(t[0], t[0]); */              /*  364: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a0 */\\\n/* sqr(t[0], t[0]); */              /*  365: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac540 */\\\n/* sqr(t[0], t[0]); */              /*  366: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a80 */\\\nsqr_n_mul(t[0], t[0], 7, t[5]);     /*  367: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f */\\\n/* sqr(t[0], t[0]); */              /*  368: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e */\\\n/* sqr(t[0], t[0]); */              /*  369: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7c */\\\n/* sqr(t[0], t[0]); */              /*  370: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54f8 */\\\n/* sqr(t[0], t[0]); */              /*  371: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f0 */\\\n/* sqr(t[0], t[0]); */              /*  372: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  373: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff */\\\n/* sqr(t[0], t[0]); */              /*  374: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe */\\\n/* sqr(t[0], t[0]); */              /*  375: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffc */\\\n/* sqr(t[0], t[0]); */              /*  376: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ff8 */\\\n/* sqr(t[0], t[0]); */              /*  377: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff0 */\\\n/* sqr(t[0], t[0]); */              /*  378: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  379: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff */\\\n/* sqr(t[0], t[0]); */              /*  380: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54fffe */\\\n/* sqr(t[0], t[0]); */              /*  381: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9fffc */\\\n/* sqr(t[0], t[0]); */              /*  382: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153fff8 */\\\n/* sqr(t[0], t[0]); */              /*  383: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[8]);     /*  384: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff */\\\n/* sqr(t[0], t[0]); */              /*  385: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffe */\\\n/* sqr(t[0], t[0]); */              /*  386: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffc */\\\n/* sqr(t[0], t[0]); */              /*  387: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffff8 */\\\n/* sqr(t[0], t[0]); */              /*  388: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[7]);     /*  389: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff7 */\\\n/* sqr(t[0], t[0]); */              /*  390: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee */\\\n/* sqr(t[0], t[0]); */              /*  391: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc */\\\n/* sqr(t[0], t[0]); */              /*  392: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb8 */\\\n/* sqr(t[0], t[0]); */              /*  393: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff70 */\\\n/* sqr(t[0], t[0]); */              /*  394: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee0 */\\\n/* sqr(t[0], t[0]); */              /*  395: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc0 */\\\n/* sqr(t[0], t[0]); */              /*  396: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb80 */\\\nsqr_n_mul(t[0], t[0], 7, t[5]);     /*  397: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f */\\\n/* sqr(t[0], t[0]); */              /*  398: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e */\\\n/* sqr(t[0], t[0]); */              /*  399: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7c */\\\n/* sqr(t[0], t[0]); */              /*  400: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcf8 */\\\n/* sqr(t[0], t[0]); */              /*  401: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f0 */\\\n/* sqr(t[0], t[0]); */              /*  402: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[6]);     /*  403: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd */\\\n/* sqr(t[0], t[0]); */              /*  404: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa */\\\n/* sqr(t[0], t[0]); */              /*  405: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff4 */\\\n/* sqr(t[0], t[0]); */              /*  406: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fe8 */\\\n/* sqr(t[0], t[0]); */              /*  407: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd0 */\\\n/* sqr(t[0], t[0]); */              /*  408: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  409: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf */\\\n/* sqr(t[0], t[0]); */              /*  410: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e */\\\n/* sqr(t[0], t[0]); */              /*  411: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefc */\\\n/* sqr(t[0], t[0]); */              /*  412: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdf8 */\\\n/* sqr(t[0], t[0]); */              /*  413: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf0 */\\\n/* sqr(t[0], t[0]); */              /*  414: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  415: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff */\\\n/* sqr(t[0], t[0]); */              /*  416: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe */\\\n/* sqr(t[0], t[0]); */              /*  417: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffc */\\\n/* sqr(t[0], t[0]); */              /*  418: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbff8 */\\\n/* sqr(t[0], t[0]); */              /*  419: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff0 */\\\n/* sqr(t[0], t[0]); */              /*  420: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  421: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff */\\\n/* sqr(t[0], t[0]); */              /*  422: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe */\\\n/* sqr(t[0], t[0]); */              /*  423: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffc */\\\n/* sqr(t[0], t[0]); */              /*  424: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fff8 */\\\n/* sqr(t[0], t[0]); */              /*  425: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff0 */\\\n/* sqr(t[0], t[0]); */              /*  426: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  427: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff */\\\n/* sqr(t[0], t[0]); */              /*  428: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe */\\\n/* sqr(t[0], t[0]); */              /*  429: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ffffc */\\\n/* sqr(t[0], t[0]); */              /*  430: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffff8 */\\\n/* sqr(t[0], t[0]); */              /*  431: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff0 */\\\n/* sqr(t[0], t[0]); */              /*  432: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  433: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff */\\\n/* sqr(t[0], t[0]); */              /*  434: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe */\\\n/* sqr(t[0], t[0]); */              /*  435: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffc */\\\n/* sqr(t[0], t[0]); */              /*  436: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffff8 */\\\n/* sqr(t[0], t[0]); */              /*  437: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff0 */\\\n/* sqr(t[0], t[0]); */              /*  438: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  439: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff */\\\n/* sqr(t[0], t[0]); */              /*  440: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffffffe */\\\n/* sqr(t[0], t[0]); */              /*  441: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffffffc */\\\n/* sqr(t[0], t[0]); */              /*  442: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffff8 */\\\n/* sqr(t[0], t[0]); */              /*  443: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[4]);     /*  444: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd */\\\n/* sqr(t[0], t[0]); */              /*  445: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa */\\\n/* sqr(t[0], t[0]); */              /*  446: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff4 */\\\n/* sqr(t[0], t[0]); */              /*  447: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffe8 */\\\n/* sqr(t[0], t[0]); */              /*  448: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd0 */\\\n/* sqr(t[0], t[0]); */              /*  449: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa0 */\\\n/* sqr(t[0], t[0]); */              /*  450: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff40 */\\\nsqr_n_mul(t[0], t[0], 6, t[3]);     /*  451: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff55 */\\\n/* sqr(t[0], t[0]); */              /*  452: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaa */\\\n/* sqr(t[0], t[0]); */              /*  453: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd54 */\\\n/* sqr(t[0], t[0]); */              /*  454: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa8 */\\\n/* sqr(t[0], t[0]); */              /*  455: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff550 */\\\nsqr_n_mul(t[0], t[0], 4, t[2]);     /*  456: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff555 */\\\n/* sqr(t[0], t[0]); */              /*  457: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaaa */\\\n/* sqr(t[0], t[0]); */              /*  458: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd554 */\\\n/* sqr(t[0], t[0]); */              /*  459: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa8 */\\\nsqr_n_mul(out, t[0], 3, t[1]);      /*  460: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaa9 */\\\n} while(0)\n"
  },
  {
    "path": "src/recip.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"fields.h\"\n\n#ifdef __OPTIMIZE_SIZE__\n/*\n * 608 multiplications for scalar inversion modulo BLS12-381 prime, 32%\n * more than corresponding optimal addition-chain, plus mispredicted\n * branch penalties on top of that... The addition chain below was\n * measured to be >50% faster.\n */\nstatic void flt_reciprocal_fp(vec384 out, const vec384 inp)\n{\n    static const byte BLS12_381_P_minus_2[] = {\n        TO_BYTES(0xb9feffffffffaaa9), TO_BYTES(0x1eabfffeb153ffff),\n        TO_BYTES(0x6730d2a0f6b0f624), TO_BYTES(0x64774b84f38512bf),\n        TO_BYTES(0x4b1ba7b6434bacd7), TO_BYTES(0x1a0111ea397fe69a)\n    };\n\n    exp_mont_384(out, inp, BLS12_381_P_minus_2, 381, BLS12_381_P, p0);\n}\n#else\n# define sqr(ret,a)\t\tsqr_fp(ret,a)\n# define mul(ret,a,b)\t\tmul_fp(ret,a,b)\n# define sqr_n_mul(ret,a,n,b)\tsqr_n_mul_fp(ret,a,n,b)\n\n# include \"recip-addchain.h\"\nstatic void flt_reciprocal_fp(vec384 out, const vec384 inp)\n{\n    RECIPROCAL_MOD_BLS12_381_P(out, inp, vec384);\n}\n# undef RECIPROCAL_MOD_BLS12_381_P\n# undef sqr_n_mul\n# undef mul\n# undef sqr\n#endif\n\nstatic void flt_reciprocal_fp2(vec384x out, const vec384x inp)\n{\n    vec384 t0, t1;\n\n    /*\n     * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i\n     */\n    sqr_fp(t0, inp[0]);\n    sqr_fp(t1, inp[1]);\n    add_fp(t0, t0, t1);\n    flt_reciprocal_fp(t1, t0);\n    mul_fp(out[0], inp[0], t1);\n    mul_fp(out[1], inp[1], t1);\n    neg_fp(out[1], out[1]);\n}\n\nstatic void reciprocal_fp(vec384 out, const vec384 inp)\n{\n    static const vec384 Px8 = {    /* left-aligned value of the modulus */\n        TO_LIMB_T(0xcff7fffffffd5558), TO_LIMB_T(0xf55ffff58a9ffffd),\n        TO_LIMB_T(0x39869507b587b120), TO_LIMB_T(0x23ba5c279c2895fb),\n        TO_LIMB_T(0x58dd3db21a5d66bb), TO_LIMB_T(0xd0088f51cbff34d2)\n    };\n    union { vec768 x; vec384 r[2]; } temp;\n\n    ct_inverse_mod_384(temp.x, inp, BLS12_381_P, Px8);\n    redc_mont_384(temp.r[0], temp.x, BLS12_381_P, p0);\n    mul_mont_384(temp.r[0], temp.r[0], BLS12_381_RR, BLS12_381_P, p0);\n\n#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION\n    /* sign goes straight to flt_reciprocal */\n    mul_mont_384(temp.r[1], temp.r[0], inp, BLS12_381_P, p0);\n    if (vec_is_equal(temp.r[1],  BLS12_381_Rx.p, sizeof(vec384)) |\n        vec_is_zero(temp.r[1], sizeof(vec384)))\n        vec_copy(out, temp.r[0], sizeof(vec384));\n    else\n        flt_reciprocal_fp(out, inp);\n#else\n    vec_copy(out, temp.r[0], sizeof(vec384));\n#endif\n}\n\nvoid blst_fp_inverse(vec384 out, const vec384 inp)\n{   reciprocal_fp(out, inp);   }\n\nvoid blst_fp_eucl_inverse(vec384 ret, const vec384 a)\n{   reciprocal_fp(ret, a);   }\n\nstatic void reciprocal_fp2(vec384x out, const vec384x inp)\n{\n    vec384 t0, t1;\n\n    /*\n     * |out| = 1/(a + b*i) = a/(a^2+b^2) - b/(a^2+b^2)*i\n     */\n    sqr_fp(t0, inp[0]);\n    sqr_fp(t1, inp[1]);\n    add_fp(t0, t0, t1);\n    reciprocal_fp(t1, t0);\n    mul_fp(out[0], inp[0], t1);\n    mul_fp(out[1], inp[1], t1);\n    neg_fp(out[1], out[1]);\n}\n\nvoid blst_fp2_inverse(vec384x out, const vec384x inp)\n{   reciprocal_fp2(out, inp);   }\n\nvoid blst_fp2_eucl_inverse(vec384x out, const vec384x inp)\n{   reciprocal_fp2(out, inp);   }\n\nstatic void reciprocal_fr(vec256 out, const vec256 inp)\n{\n    static const vec256 rx2 = { /* left-aligned value of the modulus */\n        TO_LIMB_T(0xfffffffe00000002), TO_LIMB_T(0xa77b4805fffcb7fd),\n        TO_LIMB_T(0x6673b0101343b00a), TO_LIMB_T(0xe7db4ea6533afa90),\n    };\n    vec512 temp;\n\n    ct_inverse_mod_256(temp, inp, BLS12_381_r, rx2);\n    redc_mont_256(out, temp, BLS12_381_r, r0);\n    mul_mont_sparse_256(out, out, BLS12_381_rRR, BLS12_381_r, r0);\n}\n\nvoid blst_fr_inverse(vec256 out, const vec256 inp)\n{   reciprocal_fr(out, inp);   }\n\nvoid blst_fr_eucl_inverse(vec256 out, const vec256 inp)\n{   reciprocal_fr(out, inp);   }\n"
  },
  {
    "path": "src/server.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"keygen.c\"\n#include \"hash_to_field.c\"\n#include \"e1.c\"\n#include \"map_to_g1.c\"\n#include \"e2.c\"\n#include \"map_to_g2.c\"\n#include \"fp12_tower.c\"\n#include \"pairing.c\"\n#include \"aggregate.c\"\n#include \"exp.c\"\n#include \"sqrt.c\"\n#include \"recip.c\"\n#include \"bulk_addition.c\"\n#include \"multi_scalar.c\"\n#include \"consts.c\"\n#include \"vect.c\"\n#include \"exports.c\"\n#ifndef __BLST_CGO__\n# include \"rb_tree.c\"\n#endif\n#ifdef BLST_FR_PENTAROOT\n# include \"pentaroot.c\"\n#endif\n#ifndef __BLST_NO_CPUID__\n# include \"cpuid.c\"\n#endif\n"
  },
  {
    "path": "src/sha256.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_SHA256_H__\n#define __BLS12_381_ASM_SHA256_H__\n\n#include \"vect.h\"\n\n#if (defined(__x86_64__) || defined(__x86_64) || defined(_M_X64)) && \\\n     defined(__SHA__) /* -msha */ && !defined(__BLST_PORTABLE__)\n# define sha256_block_data_order blst_sha256_block_data_order_shaext\n#elif defined(__aarch64__) && \\\n      defined(__ARM_FEATURE_CRYPTO) && !defined(__BLST_PORTABLE__)\n# define sha256_block_data_order blst_sha256_block_armv8\n#else\n# define sha256_block_data_order blst_sha256_block_data_order\n#endif\n#define sha256_hcopy blst_sha256_hcopy\n#define sha256_bcopy blst_sha256_bcopy\n#define sha256_emit  blst_sha256_emit\n\nvoid sha256_block_data_order(unsigned int *h, const void *inp, size_t blocks);\nvoid sha256_hcopy(unsigned int dst[8], const unsigned int src[8]);\nvoid sha256_bcopy(void *dst, const void *src, size_t len);\n\n/*\n * If SHA256_CTX conflicts with something, just redefine it to alternative\n * custom name prior including this header.\n */\ntypedef struct {\n    unsigned int h[8];\n    unsigned long long N;\n    unsigned char buf[64];\n    size_t off;\n} SHA256_CTX;\n\n\nstatic void sha256_init_h(unsigned int h[8])\n{\n    h[0] = 0x6a09e667U;\n    h[1] = 0xbb67ae85U;\n    h[2] = 0x3c6ef372U;\n    h[3] = 0xa54ff53aU;\n    h[4] = 0x510e527fU;\n    h[5] = 0x9b05688cU;\n    h[6] = 0x1f83d9abU;\n    h[7] = 0x5be0cd19U;\n}\n\nstatic void sha256_init(SHA256_CTX *ctx)\n{\n    sha256_init_h(ctx->h);\n    ctx->N = 0;\n    vec_zero(ctx->buf, sizeof(ctx->buf));\n    ctx->off = 0;\n}\n\nstatic void sha256_update(SHA256_CTX *ctx, const void *_inp, size_t len)\n{\n    size_t n;\n    const unsigned char *inp = _inp;\n\n    ctx->N += len;\n\n    if ((len != 0) & ((n = ctx->off) != 0)) {\n        size_t rem = sizeof(ctx->buf) - n;\n\n        if (rem > len) {\n            sha256_bcopy(ctx->buf + n, inp, len);\n            ctx->off += len;\n            return;\n        } else {\n            sha256_bcopy(ctx->buf + n, inp, rem);\n            inp += rem;\n            len -= rem;\n            sha256_block_data_order(ctx->h, ctx->buf, 1);\n            vec_zero(ctx->buf, sizeof(ctx->buf));\n            ctx->off = 0;\n        }\n    }\n\n    n = len / sizeof(ctx->buf);\n    if (n > 0) {\n        sha256_block_data_order(ctx->h, inp, n);\n        n *= sizeof(ctx->buf);\n        inp += n;\n        len -= n;\n    }\n\n    if (len)\n        sha256_bcopy(ctx->buf, inp, ctx->off = len);\n}\n\n#define __TOBE32(ptr, val) ((ptr)[0] = (unsigned char)((val)>>24), \\\n                            (ptr)[1] = (unsigned char)((val)>>16), \\\n                            (ptr)[2] = (unsigned char)((val)>>8),  \\\n                            (ptr)[3] = (unsigned char)(val))\n\n#if 1\nvoid sha256_emit(unsigned char md[32], const unsigned int h[8]);\n#else\nstatic void sha256_emit(unsigned char md[32], const unsigned int h[8])\n{\n    unsigned int h_i;\n\n    h_i = h[0]; __TOBE32(md + 0, h_i);\n    h_i = h[1]; __TOBE32(md + 4, h_i);\n    h_i = h[2]; __TOBE32(md + 8, h_i);\n    h_i = h[3]; __TOBE32(md + 12, h_i);\n    h_i = h[4]; __TOBE32(md + 16, h_i);\n    h_i = h[5]; __TOBE32(md + 20, h_i);\n    h_i = h[6]; __TOBE32(md + 24, h_i);\n    h_i = h[7]; __TOBE32(md + 28, h_i);\n}\n#endif\n\nstatic void sha256_final(unsigned char md[32], SHA256_CTX *ctx)\n{\n    unsigned long long bits = ctx->N * 8;\n    size_t n = ctx->off;\n    unsigned char *tail;\n\n    ctx->buf[n++] = 0x80;\n\n    if (n > (sizeof(ctx->buf) - 8)) {\n        sha256_block_data_order(ctx->h, ctx->buf, 1);\n        vec_zero(ctx->buf, sizeof(ctx->buf));\n    }\n\n    tail = ctx->buf + sizeof(ctx->buf) - 8;\n    __TOBE32(tail, (unsigned int)(bits >> 32));\n    __TOBE32(tail + 4, (unsigned int)bits);\n    sha256_block_data_order(ctx->h, ctx->buf, 1);\n    sha256_emit(md, ctx->h);\n}\n\n#undef __TOBE32\n#endif\n"
  },
  {
    "path": "src/sqrt-addchain.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n/*\n * The \"magic\" number is (BLS12_381_P-3)/4. Exponentiation to which\n * yields reciprocal of sqrt(x), which is used in simplified Shallue-\n * van de Woestijne-Ulas map-to-curve method, but it's trivial to adapt\n * it for more \"traditional\" sqrt(x) as 'x*ret' (or for is_square(x)\n * as 'x*ret^2==1').\n *\n * Generated with 'addchain 1000602388805416848354447456433976039139220704984751971333014534031007912622709466110671907282253916009473568139946'\n * https://github.com/kwantam/addchain\n *\n * # Bos-Coster (win=4)           :  458 (16) <<<\n * # Bos-Coster (win=5)           :  460 (28)\n * # Bos-Coster (win=6)           :  461 (33)\n * # Bos-Coster (win=7)           :  460 (28)\n * # Bos-Coster (win=3)           :  462 ( 9)\n * # Bos-Coster (win=8)           :  466 (34)\n * # Bos-Coster (win=9)           :  464 (31)\n * # Yacobi                       :  478 (31)\n * # Bos-Coster (win=10)          :  473 (30)\n * # Bos-Coster (win=2)           :  486 ( 5)\n * # Bergeron-Berstel-Brlek-Duboc :  489 ( 5)\n */\n\n#define RECIP_SQRT_MOD_BLS12_381_P(out, inp, ptype) do { \\\nptype t[16]; \\\nvec_copy(t[13], inp, sizeof(ptype));/*    0: 1 */\\\nsqr(t[0], t[13]);                   /*    1: 2 */\\\nmul(t[8], t[0], t[13]);             /*    2: 3 */\\\nsqr(t[4], t[0]);                    /*    3: 4 */\\\nmul(t[1], t[8], t[0]);              /*    4: 5 */\\\nmul(t[6], t[4], t[8]);              /*    5: 7 */\\\nmul(t[9], t[1], t[4]);              /*    6: 9 */\\\nmul(t[12], t[6], t[4]);             /*    7: b */\\\nmul(t[3], t[9], t[4]);              /*    8: d */\\\nmul(t[7], t[12], t[4]);             /*    9: f */\\\nmul(t[15], t[3], t[4]);             /*   10: 11 */\\\nmul(t[10], t[7], t[4]);             /*   11: 13 */\\\nmul(t[2], t[15], t[4]);             /*   12: 15 */\\\nmul(t[11], t[10], t[4]);            /*   13: 17 */\\\nsqr(t[0], t[3]);                    /*   14: 1a */\\\nmul(t[14], t[11], t[4]);            /*   15: 1b */\\\nmul(t[5], t[0], t[8]);              /*   16: 1d */\\\nmul(t[4], t[0], t[1]);              /*   17: 1f */\\\n/* sqr(t[0], t[0]); */              /*   18: 34 */\\\n/* sqr(t[0], t[0]); */              /*   19: 68 */\\\n/* sqr(t[0], t[0]); */              /*   20: d0 */\\\n/* sqr(t[0], t[0]); */              /*   21: 1a0 */\\\n/* sqr(t[0], t[0]); */              /*   22: 340 */\\\n/* sqr(t[0], t[0]); */              /*   23: 680 */\\\n/* sqr(t[0], t[0]); */              /*   24: d00 */\\\n/* sqr(t[0], t[0]); */              /*   25: 1a00 */\\\n/* sqr(t[0], t[0]); */              /*   26: 3400 */\\\n/* sqr(t[0], t[0]); */              /*   27: 6800 */\\\n/* sqr(t[0], t[0]); */              /*   28: d000 */\\\n/* sqr(t[0], t[0]); */              /*   29: 1a000 */\\\nsqr_n_mul(t[0], t[0], 12, t[15]);   /*   30: 1a011 */\\\n/* sqr(t[0], t[0]); */              /*   31: 34022 */\\\n/* sqr(t[0], t[0]); */              /*   32: 68044 */\\\n/* sqr(t[0], t[0]); */              /*   33: d0088 */\\\n/* sqr(t[0], t[0]); */              /*   34: 1a0110 */\\\n/* sqr(t[0], t[0]); */              /*   35: 340220 */\\\n/* sqr(t[0], t[0]); */              /*   36: 680440 */\\\n/* sqr(t[0], t[0]); */              /*   37: d00880 */\\\nsqr_n_mul(t[0], t[0], 7, t[7]);     /*   38: d0088f */\\\n/* sqr(t[0], t[0]); */              /*   39: 1a0111e */\\\n/* sqr(t[0], t[0]); */              /*   40: 340223c */\\\n/* sqr(t[0], t[0]); */              /*   41: 6804478 */\\\n/* sqr(t[0], t[0]); */              /*   42: d0088f0 */\\\nsqr_n_mul(t[0], t[0], 4, t[1]);     /*   43: d0088f5 */\\\n/* sqr(t[0], t[0]); */              /*   44: 1a0111ea */\\\n/* sqr(t[0], t[0]); */              /*   45: 340223d4 */\\\n/* sqr(t[0], t[0]); */              /*   46: 680447a8 */\\\n/* sqr(t[0], t[0]); */              /*   47: d0088f50 */\\\n/* sqr(t[0], t[0]); */              /*   48: 1a0111ea0 */\\\n/* sqr(t[0], t[0]); */              /*   49: 340223d40 */\\\nsqr_n_mul(t[0], t[0], 6, t[6]);     /*   50: 340223d47 */\\\n/* sqr(t[0], t[0]); */              /*   51: 680447a8e */\\\n/* sqr(t[0], t[0]); */              /*   52: d0088f51c */\\\n/* sqr(t[0], t[0]); */              /*   53: 1a0111ea38 */\\\n/* sqr(t[0], t[0]); */              /*   54: 340223d470 */\\\n/* sqr(t[0], t[0]); */              /*   55: 680447a8e0 */\\\n/* sqr(t[0], t[0]); */              /*   56: d0088f51c0 */\\\n/* sqr(t[0], t[0]); */              /*   57: 1a0111ea380 */\\\nsqr_n_mul(t[0], t[0], 7, t[11]);    /*   58: 1a0111ea397 */\\\n/* sqr(t[0], t[0]); */              /*   59: 340223d472e */\\\n/* sqr(t[0], t[0]); */              /*   60: 680447a8e5c */\\\n/* sqr(t[0], t[0]); */              /*   61: d0088f51cb8 */\\\n/* sqr(t[0], t[0]); */              /*   62: 1a0111ea3970 */\\\n/* sqr(t[0], t[0]); */              /*   63: 340223d472e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*   64: 340223d472ff */\\\n/* sqr(t[0], t[0]); */              /*   65: 680447a8e5fe */\\\n/* sqr(t[0], t[0]); */              /*   66: d0088f51cbfc */\\\nsqr_n_mul(t[0], t[0], 2, t[8]);     /*   67: d0088f51cbff */\\\n/* sqr(t[0], t[0]); */              /*   68: 1a0111ea397fe */\\\n/* sqr(t[0], t[0]); */              /*   69: 340223d472ffc */\\\n/* sqr(t[0], t[0]); */              /*   70: 680447a8e5ff8 */\\\n/* sqr(t[0], t[0]); */              /*   71: d0088f51cbff0 */\\\n/* sqr(t[0], t[0]); */              /*   72: 1a0111ea397fe0 */\\\n/* sqr(t[0], t[0]); */              /*   73: 340223d472ffc0 */\\\nsqr_n_mul(t[0], t[0], 6, t[3]);     /*   74: 340223d472ffcd */\\\n/* sqr(t[0], t[0]); */              /*   75: 680447a8e5ff9a */\\\n/* sqr(t[0], t[0]); */              /*   76: d0088f51cbff34 */\\\n/* sqr(t[0], t[0]); */              /*   77: 1a0111ea397fe68 */\\\n/* sqr(t[0], t[0]); */              /*   78: 340223d472ffcd0 */\\\n/* sqr(t[0], t[0]); */              /*   79: 680447a8e5ff9a0 */\\\n/* sqr(t[0], t[0]); */              /*   80: d0088f51cbff340 */\\\nsqr_n_mul(t[0], t[0], 6, t[3]);     /*   81: d0088f51cbff34d */\\\n/* sqr(t[0], t[0]); */              /*   82: 1a0111ea397fe69a */\\\n/* sqr(t[0], t[0]); */              /*   83: 340223d472ffcd34 */\\\n/* sqr(t[0], t[0]); */              /*   84: 680447a8e5ff9a68 */\\\n/* sqr(t[0], t[0]); */              /*   85: d0088f51cbff34d0 */\\\n/* sqr(t[0], t[0]); */              /*   86: 1a0111ea397fe69a0 */\\\n/* sqr(t[0], t[0]); */              /*   87: 340223d472ffcd340 */\\\nsqr_n_mul(t[0], t[0], 6, t[9]);     /*   88: 340223d472ffcd349 */\\\n/* sqr(t[0], t[0]); */              /*   89: 680447a8e5ff9a692 */\\\n/* sqr(t[0], t[0]); */              /*   90: d0088f51cbff34d24 */\\\n/* sqr(t[0], t[0]); */              /*   91: 1a0111ea397fe69a48 */\\\nsqr_n_mul(t[0], t[0], 3, t[8]);     /*   92: 1a0111ea397fe69a4b */\\\n/* sqr(t[0], t[0]); */              /*   93: 340223d472ffcd3496 */\\\n/* sqr(t[0], t[0]); */              /*   94: 680447a8e5ff9a692c */\\\n/* sqr(t[0], t[0]); */              /*   95: d0088f51cbff34d258 */\\\n/* sqr(t[0], t[0]); */              /*   96: 1a0111ea397fe69a4b0 */\\\n/* sqr(t[0], t[0]); */              /*   97: 340223d472ffcd34960 */\\\n/* sqr(t[0], t[0]); */              /*   98: 680447a8e5ff9a692c0 */\\\n/* sqr(t[0], t[0]); */              /*   99: d0088f51cbff34d2580 */\\\nsqr_n_mul(t[0], t[0], 7, t[3]);     /*  100: d0088f51cbff34d258d */\\\n/* sqr(t[0], t[0]); */              /*  101: 1a0111ea397fe69a4b1a */\\\n/* sqr(t[0], t[0]); */              /*  102: 340223d472ffcd349634 */\\\n/* sqr(t[0], t[0]); */              /*  103: 680447a8e5ff9a692c68 */\\\n/* sqr(t[0], t[0]); */              /*  104: d0088f51cbff34d258d0 */\\\nsqr_n_mul(t[0], t[0], 4, t[3]);     /*  105: d0088f51cbff34d258dd */\\\n/* sqr(t[0], t[0]); */              /*  106: 1a0111ea397fe69a4b1ba */\\\n/* sqr(t[0], t[0]); */              /*  107: 340223d472ffcd3496374 */\\\n/* sqr(t[0], t[0]); */              /*  108: 680447a8e5ff9a692c6e8 */\\\n/* sqr(t[0], t[0]); */              /*  109: d0088f51cbff34d258dd0 */\\\n/* sqr(t[0], t[0]); */              /*  110: 1a0111ea397fe69a4b1ba0 */\\\n/* sqr(t[0], t[0]); */              /*  111: 340223d472ffcd34963740 */\\\nsqr_n_mul(t[0], t[0], 6, t[7]);     /*  112: 340223d472ffcd3496374f */\\\n/* sqr(t[0], t[0]); */              /*  113: 680447a8e5ff9a692c6e9e */\\\n/* sqr(t[0], t[0]); */              /*  114: d0088f51cbff34d258dd3c */\\\n/* sqr(t[0], t[0]); */              /*  115: 1a0111ea397fe69a4b1ba78 */\\\n/* sqr(t[0], t[0]); */              /*  116: 340223d472ffcd3496374f0 */\\\n/* sqr(t[0], t[0]); */              /*  117: 680447a8e5ff9a692c6e9e0 */\\\n/* sqr(t[0], t[0]); */              /*  118: d0088f51cbff34d258dd3c0 */\\\nsqr_n_mul(t[0], t[0], 6, t[14]);    /*  119: d0088f51cbff34d258dd3db */\\\n/* sqr(t[0], t[0]); */              /*  120: 1a0111ea397fe69a4b1ba7b6 */\\\n/* sqr(t[0], t[0]); */              /*  121: 340223d472ffcd3496374f6c */\\\n/* sqr(t[0], t[0]); */              /*  122: 680447a8e5ff9a692c6e9ed8 */\\\nsqr_n_mul(t[0], t[0], 3, t[13]);    /*  123: 680447a8e5ff9a692c6e9ed9 */\\\n/* sqr(t[0], t[0]); */              /*  124: d0088f51cbff34d258dd3db2 */\\\n/* sqr(t[0], t[0]); */              /*  125: 1a0111ea397fe69a4b1ba7b64 */\\\n/* sqr(t[0], t[0]); */              /*  126: 340223d472ffcd3496374f6c8 */\\\n/* sqr(t[0], t[0]); */              /*  127: 680447a8e5ff9a692c6e9ed90 */\\\n/* sqr(t[0], t[0]); */              /*  128: d0088f51cbff34d258dd3db20 */\\\n/* sqr(t[0], t[0]); */              /*  129: 1a0111ea397fe69a4b1ba7b640 */\\\n/* sqr(t[0], t[0]); */              /*  130: 340223d472ffcd3496374f6c80 */\\\n/* sqr(t[0], t[0]); */              /*  131: 680447a8e5ff9a692c6e9ed900 */\\\nsqr_n_mul(t[0], t[0], 8, t[3]);     /*  132: 680447a8e5ff9a692c6e9ed90d */\\\n/* sqr(t[0], t[0]); */              /*  133: d0088f51cbff34d258dd3db21a */\\\n/* sqr(t[0], t[0]); */              /*  134: 1a0111ea397fe69a4b1ba7b6434 */\\\n/* sqr(t[0], t[0]); */              /*  135: 340223d472ffcd3496374f6c868 */\\\n/* sqr(t[0], t[0]); */              /*  136: 680447a8e5ff9a692c6e9ed90d0 */\\\n/* sqr(t[0], t[0]); */              /*  137: d0088f51cbff34d258dd3db21a0 */\\\n/* sqr(t[0], t[0]); */              /*  138: 1a0111ea397fe69a4b1ba7b64340 */\\\n/* sqr(t[0], t[0]); */              /*  139: 340223d472ffcd3496374f6c8680 */\\\nsqr_n_mul(t[0], t[0], 7, t[11]);    /*  140: 340223d472ffcd3496374f6c8697 */\\\n/* sqr(t[0], t[0]); */              /*  141: 680447a8e5ff9a692c6e9ed90d2e */\\\n/* sqr(t[0], t[0]); */              /*  142: d0088f51cbff34d258dd3db21a5c */\\\n/* sqr(t[0], t[0]); */              /*  143: 1a0111ea397fe69a4b1ba7b6434b8 */\\\n/* sqr(t[0], t[0]); */              /*  144: 340223d472ffcd3496374f6c86970 */\\\n/* sqr(t[0], t[0]); */              /*  145: 680447a8e5ff9a692c6e9ed90d2e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[12]);    /*  146: 680447a8e5ff9a692c6e9ed90d2eb */\\\n/* sqr(t[0], t[0]); */              /*  147: d0088f51cbff34d258dd3db21a5d6 */\\\n/* sqr(t[0], t[0]); */              /*  148: 1a0111ea397fe69a4b1ba7b6434bac */\\\n/* sqr(t[0], t[0]); */              /*  149: 340223d472ffcd3496374f6c869758 */\\\n/* sqr(t[0], t[0]); */              /*  150: 680447a8e5ff9a692c6e9ed90d2eb0 */\\\n/* sqr(t[0], t[0]); */              /*  151: d0088f51cbff34d258dd3db21a5d60 */\\\n/* sqr(t[0], t[0]); */              /*  152: 1a0111ea397fe69a4b1ba7b6434bac0 */\\\nsqr_n_mul(t[0], t[0], 6, t[3]);     /*  153: 1a0111ea397fe69a4b1ba7b6434bacd */\\\n/* sqr(t[0], t[0]); */              /*  154: 340223d472ffcd3496374f6c869759a */\\\n/* sqr(t[0], t[0]); */              /*  155: 680447a8e5ff9a692c6e9ed90d2eb34 */\\\n/* sqr(t[0], t[0]); */              /*  156: d0088f51cbff34d258dd3db21a5d668 */\\\n/* sqr(t[0], t[0]); */              /*  157: 1a0111ea397fe69a4b1ba7b6434bacd0 */\\\n/* sqr(t[0], t[0]); */              /*  158: 340223d472ffcd3496374f6c869759a0 */\\\n/* sqr(t[0], t[0]); */              /*  159: 680447a8e5ff9a692c6e9ed90d2eb340 */\\\nsqr_n_mul(t[0], t[0], 6, t[5]);     /*  160: 680447a8e5ff9a692c6e9ed90d2eb35d */\\\n/* sqr(t[0], t[0]); */              /*  161: d0088f51cbff34d258dd3db21a5d66ba */\\\n/* sqr(t[0], t[0]); */              /*  162: 1a0111ea397fe69a4b1ba7b6434bacd74 */\\\n/* sqr(t[0], t[0]); */              /*  163: 340223d472ffcd3496374f6c869759ae8 */\\\n/* sqr(t[0], t[0]); */              /*  164: 680447a8e5ff9a692c6e9ed90d2eb35d0 */\\\nsqr_n_mul(t[0], t[0], 4, t[9]);     /*  165: 680447a8e5ff9a692c6e9ed90d2eb35d9 */\\\n/* sqr(t[0], t[0]); */              /*  166: d0088f51cbff34d258dd3db21a5d66bb2 */\\\n/* sqr(t[0], t[0]); */              /*  167: 1a0111ea397fe69a4b1ba7b6434bacd764 */\\\n/* sqr(t[0], t[0]); */              /*  168: 340223d472ffcd3496374f6c869759aec8 */\\\n/* sqr(t[0], t[0]); */              /*  169: 680447a8e5ff9a692c6e9ed90d2eb35d90 */\\\n/* sqr(t[0], t[0]); */              /*  170: d0088f51cbff34d258dd3db21a5d66bb20 */\\\n/* sqr(t[0], t[0]); */              /*  171: 1a0111ea397fe69a4b1ba7b6434bacd7640 */\\\n/* sqr(t[0], t[0]); */              /*  172: 340223d472ffcd3496374f6c869759aec80 */\\\n/* sqr(t[0], t[0]); */              /*  173: 680447a8e5ff9a692c6e9ed90d2eb35d900 */\\\nsqr_n_mul(t[0], t[0], 8, t[5]);     /*  174: 680447a8e5ff9a692c6e9ed90d2eb35d91d */\\\n/* sqr(t[0], t[0]); */              /*  175: d0088f51cbff34d258dd3db21a5d66bb23a */\\\n/* sqr(t[0], t[0]); */              /*  176: 1a0111ea397fe69a4b1ba7b6434bacd76474 */\\\n/* sqr(t[0], t[0]); */              /*  177: 340223d472ffcd3496374f6c869759aec8e8 */\\\n/* sqr(t[0], t[0]); */              /*  178: 680447a8e5ff9a692c6e9ed90d2eb35d91d0 */\\\nsqr_n_mul(t[0], t[0], 4, t[3]);     /*  179: 680447a8e5ff9a692c6e9ed90d2eb35d91dd */\\\n/* sqr(t[0], t[0]); */              /*  180: d0088f51cbff34d258dd3db21a5d66bb23ba */\\\n/* sqr(t[0], t[0]); */              /*  181: 1a0111ea397fe69a4b1ba7b6434bacd764774 */\\\n/* sqr(t[0], t[0]); */              /*  182: 340223d472ffcd3496374f6c869759aec8ee8 */\\\n/* sqr(t[0], t[0]); */              /*  183: 680447a8e5ff9a692c6e9ed90d2eb35d91dd0 */\\\n/* sqr(t[0], t[0]); */              /*  184: d0088f51cbff34d258dd3db21a5d66bb23ba0 */\\\n/* sqr(t[0], t[0]); */              /*  185: 1a0111ea397fe69a4b1ba7b6434bacd7647740 */\\\n/* sqr(t[0], t[0]); */              /*  186: 340223d472ffcd3496374f6c869759aec8ee80 */\\\nsqr_n_mul(t[0], t[0], 7, t[11]);    /*  187: 340223d472ffcd3496374f6c869759aec8ee97 */\\\n/* sqr(t[0], t[0]); */              /*  188: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e */\\\n/* sqr(t[0], t[0]); */              /*  189: d0088f51cbff34d258dd3db21a5d66bb23ba5c */\\\n/* sqr(t[0], t[0]); */              /*  190: 1a0111ea397fe69a4b1ba7b6434bacd764774b8 */\\\n/* sqr(t[0], t[0]); */              /*  191: 340223d472ffcd3496374f6c869759aec8ee970 */\\\n/* sqr(t[0], t[0]); */              /*  192: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e0 */\\\n/* sqr(t[0], t[0]); */              /*  193: d0088f51cbff34d258dd3db21a5d66bb23ba5c0 */\\\n/* sqr(t[0], t[0]); */              /*  194: 1a0111ea397fe69a4b1ba7b6434bacd764774b80 */\\\n/* sqr(t[0], t[0]); */              /*  195: 340223d472ffcd3496374f6c869759aec8ee9700 */\\\n/* sqr(t[0], t[0]); */              /*  196: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e00 */\\\nsqr_n_mul(t[0], t[0], 9, t[10]);    /*  197: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13 */\\\n/* sqr(t[0], t[0]); */              /*  198: d0088f51cbff34d258dd3db21a5d66bb23ba5c26 */\\\n/* sqr(t[0], t[0]); */              /*  199: 1a0111ea397fe69a4b1ba7b6434bacd764774b84c */\\\nsqr_n_mul(t[0], t[0], 2, t[8]);     /*  200: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f */\\\n/* sqr(t[0], t[0]); */              /*  201: 340223d472ffcd3496374f6c869759aec8ee9709e */\\\n/* sqr(t[0], t[0]); */              /*  202: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13c */\\\n/* sqr(t[0], t[0]); */              /*  203: d0088f51cbff34d258dd3db21a5d66bb23ba5c278 */\\\n/* sqr(t[0], t[0]); */              /*  204: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f0 */\\\n/* sqr(t[0], t[0]); */              /*  205: 340223d472ffcd3496374f6c869759aec8ee9709e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[6]);     /*  206: 340223d472ffcd3496374f6c869759aec8ee9709e7 */\\\n/* sqr(t[0], t[0]); */              /*  207: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce */\\\n/* sqr(t[0], t[0]); */              /*  208: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c */\\\n/* sqr(t[0], t[0]); */              /*  209: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38 */\\\n/* sqr(t[0], t[0]); */              /*  210: 340223d472ffcd3496374f6c869759aec8ee9709e70 */\\\n/* sqr(t[0], t[0]); */              /*  211: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce0 */\\\n/* sqr(t[0], t[0]); */              /*  212: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c0 */\\\n/* sqr(t[0], t[0]); */              /*  213: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f380 */\\\nsqr_n_mul(t[0], t[0], 7, t[1]);     /*  214: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385 */\\\n/* sqr(t[0], t[0]); */              /*  215: 340223d472ffcd3496374f6c869759aec8ee9709e70a */\\\n/* sqr(t[0], t[0]); */              /*  216: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce14 */\\\n/* sqr(t[0], t[0]); */              /*  217: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c28 */\\\n/* sqr(t[0], t[0]); */              /*  218: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f3850 */\\\n/* sqr(t[0], t[0]); */              /*  219: 340223d472ffcd3496374f6c869759aec8ee9709e70a0 */\\\n/* sqr(t[0], t[0]); */              /*  220: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce140 */\\\n/* sqr(t[0], t[0]); */              /*  221: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c280 */\\\nsqr_n_mul(t[0], t[0], 7, t[9]);     /*  222: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c289 */\\\n/* sqr(t[0], t[0]); */              /*  223: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512 */\\\n/* sqr(t[0], t[0]); */              /*  224: 340223d472ffcd3496374f6c869759aec8ee9709e70a24 */\\\n/* sqr(t[0], t[0]); */              /*  225: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce1448 */\\\n/* sqr(t[0], t[0]); */              /*  226: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2890 */\\\n/* sqr(t[0], t[0]); */              /*  227: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f385120 */\\\n/* sqr(t[0], t[0]); */              /*  228: 340223d472ffcd3496374f6c869759aec8ee9709e70a240 */\\\nsqr_n_mul(t[0], t[0], 6, t[11]);    /*  229: 340223d472ffcd3496374f6c869759aec8ee9709e70a257 */\\\n/* sqr(t[0], t[0]); */              /*  230: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae */\\\n/* sqr(t[0], t[0]); */              /*  231: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895c */\\\n/* sqr(t[0], t[0]); */              /*  232: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512b8 */\\\n/* sqr(t[0], t[0]); */              /*  233: 340223d472ffcd3496374f6c869759aec8ee9709e70a2570 */\\\n/* sqr(t[0], t[0]); */              /*  234: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144ae0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  235: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd */\\\n/* sqr(t[0], t[0]); */              /*  236: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa */\\\n/* sqr(t[0], t[0]); */              /*  237: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf4 */\\\n/* sqr(t[0], t[0]); */              /*  238: 340223d472ffcd3496374f6c869759aec8ee9709e70a257e8 */\\\n/* sqr(t[0], t[0]); */              /*  239: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd0 */\\\n/* sqr(t[0], t[0]); */              /*  240: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fa0 */\\\nsqr_n_mul(t[0], t[0], 5, t[10]);    /*  241: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3 */\\\n/* sqr(t[0], t[0]); */              /*  242: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf66 */\\\n/* sqr(t[0], t[0]); */              /*  243: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ecc */\\\n/* sqr(t[0], t[0]); */              /*  244: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd98 */\\\n/* sqr(t[0], t[0]); */              /*  245: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb30 */\\\n/* sqr(t[0], t[0]); */              /*  246: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf660 */\\\nsqr_n_mul(t[0], t[0], 5, t[10]);    /*  247: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf673 */\\\n/* sqr(t[0], t[0]); */              /*  248: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece6 */\\\n/* sqr(t[0], t[0]); */              /*  249: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc */\\\n/* sqr(t[0], t[0]); */              /*  250: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398 */\\\n/* sqr(t[0], t[0]); */              /*  251: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730 */\\\n/* sqr(t[0], t[0]); */              /*  252: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece60 */\\\n/* sqr(t[0], t[0]); */              /*  253: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc0 */\\\n/* sqr(t[0], t[0]); */              /*  254: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3980 */\\\n/* sqr(t[0], t[0]); */              /*  255: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf67300 */\\\nsqr_n_mul(t[0], t[0], 8, t[3]);     /*  256: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d */\\\n/* sqr(t[0], t[0]); */              /*  257: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a */\\\n/* sqr(t[0], t[0]); */              /*  258: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34 */\\\n/* sqr(t[0], t[0]); */              /*  259: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39868 */\\\n/* sqr(t[0], t[0]); */              /*  260: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d0 */\\\n/* sqr(t[0], t[0]); */              /*  261: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a0 */\\\n/* sqr(t[0], t[0]); */              /*  262: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc340 */\\\n/* sqr(t[0], t[0]); */              /*  263: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398680 */\\\nsqr_n_mul(t[0], t[0], 7, t[2]);     /*  264: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695 */\\\n/* sqr(t[0], t[0]); */              /*  265: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a */\\\n/* sqr(t[0], t[0]); */              /*  266: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a54 */\\\n/* sqr(t[0], t[0]); */              /*  267: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a8 */\\\n/* sqr(t[0], t[0]); */              /*  268: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb3986950 */\\\n/* sqr(t[0], t[0]); */              /*  269: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0 */\\\n/* sqr(t[0], t[0]); */              /*  270: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a540 */\\\n/* sqr(t[0], t[0]); */              /*  271: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a80 */\\\n/* sqr(t[0], t[0]); */              /*  272: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869500 */\\\n/* sqr(t[0], t[0]); */              /*  273: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a00 */\\\nsqr_n_mul(t[0], t[0], 9, t[7]);     /*  274: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f */\\\n/* sqr(t[0], t[0]); */              /*  275: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e */\\\n/* sqr(t[0], t[0]); */              /*  276: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83c */\\\n/* sqr(t[0], t[0]); */              /*  277: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb398695078 */\\\n/* sqr(t[0], t[0]); */              /*  278: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f0 */\\\n/* sqr(t[0], t[0]); */              /*  279: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[3]);     /*  280: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed */\\\n/* sqr(t[0], t[0]); */              /*  281: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83da */\\\n/* sqr(t[0], t[0]); */              /*  282: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b4 */\\\n/* sqr(t[0], t[0]); */              /*  283: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f68 */\\\nsqr_n_mul(t[0], t[0], 3, t[8]);     /*  284: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b */\\\n/* sqr(t[0], t[0]); */              /*  285: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed6 */\\\n/* sqr(t[0], t[0]); */              /*  286: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac */\\\n/* sqr(t[0], t[0]); */              /*  287: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b58 */\\\n/* sqr(t[0], t[0]); */              /*  288: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0 */\\\n/* sqr(t[0], t[0]); */              /*  289: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed60 */\\\n/* sqr(t[0], t[0]); */              /*  290: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac0 */\\\n/* sqr(t[0], t[0]); */              /*  291: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b580 */\\\n/* sqr(t[0], t[0]); */              /*  292: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b00 */\\\nsqr_n_mul(t[0], t[0], 8, t[7]);     /*  293: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f */\\\n/* sqr(t[0], t[0]); */              /*  294: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61e */\\\n/* sqr(t[0], t[0]); */              /*  295: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3c */\\\n/* sqr(t[0], t[0]); */              /*  296: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b5878 */\\\nsqr_n_mul(t[0], t[0], 3, t[8]);     /*  297: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b */\\\n/* sqr(t[0], t[0]); */              /*  298: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6 */\\\n/* sqr(t[0], t[0]); */              /*  299: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec */\\\n/* sqr(t[0], t[0]); */              /*  300: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8 */\\\n/* sqr(t[0], t[0]); */              /*  301: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b0 */\\\n/* sqr(t[0], t[0]); */              /*  302: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f60 */\\\n/* sqr(t[0], t[0]); */              /*  303: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec0 */\\\n/* sqr(t[0], t[0]); */              /*  304: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d80 */\\\nsqr_n_mul(t[0], t[0], 7, t[9]);     /*  305: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89 */\\\n/* sqr(t[0], t[0]); */              /*  306: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b12 */\\\n/* sqr(t[0], t[0]); */              /*  307: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f624 */\\\n/* sqr(t[0], t[0]); */              /*  308: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec48 */\\\n/* sqr(t[0], t[0]); */              /*  309: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d890 */\\\n/* sqr(t[0], t[0]); */              /*  310: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120 */\\\n/* sqr(t[0], t[0]); */              /*  311: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6240 */\\\n/* sqr(t[0], t[0]); */              /*  312: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec480 */\\\n/* sqr(t[0], t[0]); */              /*  313: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8900 */\\\n/* sqr(t[0], t[0]); */              /*  314: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b1200 */\\\nsqr_n_mul(t[0], t[0], 9, t[7]);     /*  315: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f */\\\n/* sqr(t[0], t[0]); */              /*  316: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e */\\\n/* sqr(t[0], t[0]); */              /*  317: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c */\\\n/* sqr(t[0], t[0]); */              /*  318: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d89078 */\\\n/* sqr(t[0], t[0]); */              /*  319: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f0 */\\\n/* sqr(t[0], t[0]); */              /*  320: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241e0 */\\\n/* sqr(t[0], t[0]); */              /*  321: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483c0 */\\\nsqr_n_mul(t[0], t[0], 6, t[2]);     /*  322: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d5 */\\\n/* sqr(t[0], t[0]); */              /*  323: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa */\\\n/* sqr(t[0], t[0]); */              /*  324: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f54 */\\\n/* sqr(t[0], t[0]); */              /*  325: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241ea8 */\\\n/* sqr(t[0], t[0]); */              /*  326: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d50 */\\\n/* sqr(t[0], t[0]); */              /*  327: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aa0 */\\\n/* sqr(t[0], t[0]); */              /*  328: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f540 */\\\nsqr_n_mul(t[0], t[0], 6, t[4]);     /*  329: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f */\\\n/* sqr(t[0], t[0]); */              /*  330: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe */\\\n/* sqr(t[0], t[0]); */              /*  331: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57c */\\\n/* sqr(t[0], t[0]); */              /*  332: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaf8 */\\\n/* sqr(t[0], t[0]); */              /*  333: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55f0 */\\\n/* sqr(t[0], t[0]); */              /*  334: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  335: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff */\\\n/* sqr(t[0], t[0]); */              /*  336: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe */\\\n/* sqr(t[0], t[0]); */              /*  337: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffc */\\\n/* sqr(t[0], t[0]); */              /*  338: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ff8 */\\\n/* sqr(t[0], t[0]); */              /*  339: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabff0 */\\\n/* sqr(t[0], t[0]); */              /*  340: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  341: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff */\\\n/* sqr(t[0], t[0]); */              /*  342: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aafffe */\\\n/* sqr(t[0], t[0]); */              /*  343: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55fffc */\\\n/* sqr(t[0], t[0]); */              /*  344: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfff8 */\\\n/* sqr(t[0], t[0]); */              /*  345: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[3]);     /*  346: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd */\\\n/* sqr(t[0], t[0]); */              /*  347: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffa */\\\n/* sqr(t[0], t[0]); */              /*  348: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff4 */\\\n/* sqr(t[0], t[0]); */              /*  349: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffe8 */\\\nsqr_n_mul(t[0], t[0], 3, t[8]);     /*  350: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb */\\\n/* sqr(t[0], t[0]); */              /*  351: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd6 */\\\n/* sqr(t[0], t[0]); */              /*  352: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac */\\\n/* sqr(t[0], t[0]); */              /*  353: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58 */\\\n/* sqr(t[0], t[0]); */              /*  354: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb0 */\\\n/* sqr(t[0], t[0]); */              /*  355: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd60 */\\\n/* sqr(t[0], t[0]); */              /*  356: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac0 */\\\n/* sqr(t[0], t[0]); */              /*  357: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff580 */\\\n/* sqr(t[0], t[0]); */              /*  358: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb00 */\\\nsqr_n_mul(t[0], t[0], 8, t[2]);     /*  359: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb15 */\\\n/* sqr(t[0], t[0]); */              /*  360: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a */\\\n/* sqr(t[0], t[0]); */              /*  361: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54 */\\\n/* sqr(t[0], t[0]); */              /*  362: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a8 */\\\n/* sqr(t[0], t[0]); */              /*  363: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb150 */\\\n/* sqr(t[0], t[0]); */              /*  364: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a0 */\\\n/* sqr(t[0], t[0]); */              /*  365: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac540 */\\\n/* sqr(t[0], t[0]); */              /*  366: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a80 */\\\nsqr_n_mul(t[0], t[0], 7, t[4]);     /*  367: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f */\\\n/* sqr(t[0], t[0]); */              /*  368: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e */\\\n/* sqr(t[0], t[0]); */              /*  369: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7c */\\\n/* sqr(t[0], t[0]); */              /*  370: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54f8 */\\\n/* sqr(t[0], t[0]); */              /*  371: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9f0 */\\\n/* sqr(t[0], t[0]); */              /*  372: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  373: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff */\\\n/* sqr(t[0], t[0]); */              /*  374: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe */\\\n/* sqr(t[0], t[0]); */              /*  375: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffc */\\\n/* sqr(t[0], t[0]); */              /*  376: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ff8 */\\\n/* sqr(t[0], t[0]); */              /*  377: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ff0 */\\\n/* sqr(t[0], t[0]); */              /*  378: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  379: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff */\\\n/* sqr(t[0], t[0]); */              /*  380: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54fffe */\\\n/* sqr(t[0], t[0]); */              /*  381: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9fffc */\\\n/* sqr(t[0], t[0]); */              /*  382: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153fff8 */\\\n/* sqr(t[0], t[0]); */              /*  383: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7fff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[7]);     /*  384: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff */\\\n/* sqr(t[0], t[0]); */              /*  385: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffe */\\\n/* sqr(t[0], t[0]); */              /*  386: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffc */\\\n/* sqr(t[0], t[0]); */              /*  387: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffff8 */\\\n/* sqr(t[0], t[0]); */              /*  388: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[6]);     /*  389: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff7 */\\\n/* sqr(t[0], t[0]); */              /*  390: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee */\\\n/* sqr(t[0], t[0]); */              /*  391: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc */\\\n/* sqr(t[0], t[0]); */              /*  392: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb8 */\\\n/* sqr(t[0], t[0]); */              /*  393: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff70 */\\\n/* sqr(t[0], t[0]); */              /*  394: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee0 */\\\n/* sqr(t[0], t[0]); */              /*  395: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdc0 */\\\n/* sqr(t[0], t[0]); */              /*  396: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb80 */\\\nsqr_n_mul(t[0], t[0], 7, t[4]);     /*  397: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f */\\\n/* sqr(t[0], t[0]); */              /*  398: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e */\\\n/* sqr(t[0], t[0]); */              /*  399: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7c */\\\n/* sqr(t[0], t[0]); */              /*  400: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcf8 */\\\n/* sqr(t[0], t[0]); */              /*  401: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9f0 */\\\n/* sqr(t[0], t[0]); */              /*  402: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[5]);     /*  403: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd */\\\n/* sqr(t[0], t[0]); */              /*  404: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa */\\\n/* sqr(t[0], t[0]); */              /*  405: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff4 */\\\n/* sqr(t[0], t[0]); */              /*  406: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fe8 */\\\n/* sqr(t[0], t[0]); */              /*  407: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fd0 */\\\n/* sqr(t[0], t[0]); */              /*  408: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fa0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  409: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf */\\\n/* sqr(t[0], t[0]); */              /*  410: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e */\\\n/* sqr(t[0], t[0]); */              /*  411: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefc */\\\n/* sqr(t[0], t[0]); */              /*  412: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdf8 */\\\n/* sqr(t[0], t[0]); */              /*  413: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbf0 */\\\n/* sqr(t[0], t[0]); */              /*  414: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7e0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  415: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff */\\\n/* sqr(t[0], t[0]); */              /*  416: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe */\\\n/* sqr(t[0], t[0]); */              /*  417: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffc */\\\n/* sqr(t[0], t[0]); */              /*  418: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbff8 */\\\n/* sqr(t[0], t[0]); */              /*  419: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ff0 */\\\n/* sqr(t[0], t[0]); */              /*  420: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  421: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff */\\\n/* sqr(t[0], t[0]); */              /*  422: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe */\\\n/* sqr(t[0], t[0]); */              /*  423: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffc */\\\n/* sqr(t[0], t[0]); */              /*  424: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fff8 */\\\n/* sqr(t[0], t[0]); */              /*  425: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffff0 */\\\n/* sqr(t[0], t[0]); */              /*  426: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  427: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff */\\\n/* sqr(t[0], t[0]); */              /*  428: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe */\\\n/* sqr(t[0], t[0]); */              /*  429: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7ffffc */\\\n/* sqr(t[0], t[0]); */              /*  430: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffff8 */\\\n/* sqr(t[0], t[0]); */              /*  431: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffff0 */\\\n/* sqr(t[0], t[0]); */              /*  432: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  433: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff */\\\n/* sqr(t[0], t[0]); */              /*  434: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe */\\\n/* sqr(t[0], t[0]); */              /*  435: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffc */\\\n/* sqr(t[0], t[0]); */              /*  436: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffff8 */\\\n/* sqr(t[0], t[0]); */              /*  437: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbffffff0 */\\\n/* sqr(t[0], t[0]); */              /*  438: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffe0 */\\\nsqr_n_mul(t[0], t[0], 5, t[4]);     /*  439: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff */\\\n/* sqr(t[0], t[0]); */              /*  440: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9fefffffffe */\\\n/* sqr(t[0], t[0]); */              /*  441: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdfffffffc */\\\n/* sqr(t[0], t[0]); */              /*  442: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffff8 */\\\n/* sqr(t[0], t[0]); */              /*  443: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffff0 */\\\nsqr_n_mul(t[0], t[0], 4, t[3]);     /*  444: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd */\\\n/* sqr(t[0], t[0]); */              /*  445: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa */\\\n/* sqr(t[0], t[0]); */              /*  446: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff4 */\\\n/* sqr(t[0], t[0]); */              /*  447: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffe8 */\\\n/* sqr(t[0], t[0]); */              /*  448: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd0 */\\\n/* sqr(t[0], t[0]); */              /*  449: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffa0 */\\\n/* sqr(t[0], t[0]); */              /*  450: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff40 */\\\nsqr_n_mul(t[0], t[0], 6, t[2]);     /*  451: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff55 */\\\n/* sqr(t[0], t[0]); */              /*  452: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaa */\\\n/* sqr(t[0], t[0]); */              /*  453: d0088f51cbff34d258dd3db21a5d66bb23ba5c279c2895fb39869507b587b120f55ffff58a9ffffdcff7fffffffd54 */\\\n/* sqr(t[0], t[0]); */              /*  454: 1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaa8 */\\\n/* sqr(t[0], t[0]); */              /*  455: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff550 */\\\nsqr_n_mul(t[0], t[0], 4, t[1]);     /*  456: 340223d472ffcd3496374f6c869759aec8ee9709e70a257ece61a541ed61ec483d57fffd62a7ffff73fdffffffff555 */\\\nsqr(out, t[0]);                     /*  457: 680447a8e5ff9a692c6e9ed90d2eb35d91dd2e13ce144afd9cc34a83dac3d8907aaffffac54ffffee7fbfffffffeaaa */\\\n} while(0)\n"
  },
  {
    "path": "src/sqrt.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"fields.h\"\n\n#ifdef __OPTIMIZE_SIZE__\nstatic void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp)\n{\n    static const byte BLS_12_381_P_minus_3_div_4[] = {\n        TO_BYTES(0xee7fbfffffffeaaa), TO_BYTES(0x07aaffffac54ffff),\n        TO_BYTES(0xd9cc34a83dac3d89), TO_BYTES(0xd91dd2e13ce144af),\n        TO_BYTES(0x92c6e9ed90d2eb35), TO_BYTES(0x0680447a8e5ff9a6)\n    };\n\n    exp_mont_384(out, inp, BLS_12_381_P_minus_3_div_4, 379, BLS12_381_P, p0);\n}\n#else\n# if 1\n/*\n * \"383\"-bit variant omits full reductions at the ends of squarings,\n * which results in up to ~15% improvement. [One can improve further\n * by omitting full reductions even after multiplications and\n * performing final reduction at the very end of the chain.]\n */\nstatic inline void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count,\n                                const vec384 b)\n{   sqr_n_mul_mont_383(out, a, count, BLS12_381_P, p0, b);   }\n# else\nstatic void sqr_n_mul_fp(vec384 out, const vec384 a, size_t count,\n                         const vec384 b)\n{\n    while(count--) {\n        sqr_fp(out, a);\n        a = out;\n    }\n    mul_fp(out, out, b);\n}\n# endif\n\n# define sqr(ret,a)\t\tsqr_fp(ret,a)\n# define mul(ret,a,b)\t\tmul_fp(ret,a,b)\n# define sqr_n_mul(ret,a,n,b)\tsqr_n_mul_fp(ret,a,n,b)\n\n# include \"sqrt-addchain.h\"\nstatic void recip_sqrt_fp_3mod4(vec384 out, const vec384 inp)\n{\n    RECIP_SQRT_MOD_BLS12_381_P(out, inp, vec384);\n}\n# undef RECIP_SQRT_MOD_BLS12_381_P\n\n# undef sqr_n_mul\n# undef sqr\n# undef mul\n#endif\n\nstatic bool_t recip_sqrt_fp(vec384 out, const vec384 inp)\n{\n    vec384 t0, t1;\n    bool_t ret;\n\n    recip_sqrt_fp_3mod4(t0, inp);\n\n    mul_fp(t1, t0, inp);\n    sqr_fp(t1, t1);\n    ret = vec_is_equal(t1, inp, sizeof(t1));\n    vec_copy(out, t0, sizeof(t0));\n\n    return ret;\n}\n\nstatic bool_t sqrt_fp(vec384 out, const vec384 inp)\n{\n    vec384 t0, t1;\n    bool_t ret;\n\n    recip_sqrt_fp_3mod4(t0, inp);\n\n    mul_fp(t0, t0, inp);\n    sqr_fp(t1, t0);\n    ret = vec_is_equal(t1, inp, sizeof(t1));\n    vec_copy(out, t0, sizeof(t0));\n\n    return ret;\n}\n\nint blst_fp_sqrt(vec384 out, const vec384 inp)\n{   return (int)sqrt_fp(out, inp);   }\n\nint blst_fp_is_square(const vec384 inp)\n{\n    return (int)ct_is_square_mod_384(inp, BLS12_381_P);\n}\n\nstatic bool_t sqrt_align_fp2(vec384x out, const vec384x ret,\n                             const vec384x sqrt, const vec384x inp)\n{\n    static const vec384x sqrt_minus_1 = { { 0 }, { ONE_MONT_P } };\n    static const vec384x sqrt_sqrt_minus_1 = {\n      /*\n       * \"magic\" number is ±2^((p-3)/4)%p, which is \"1/sqrt(2)\",\n       * in quotes because 2*\"1/sqrt(2)\"^2 == -1 mod p, not 1,\n       * but it pivots into \"complex\" plane nevertheless...\n       */\n      { TO_LIMB_T(0x3e2f585da55c9ad1), TO_LIMB_T(0x4294213d86c18183),\n        TO_LIMB_T(0x382844c88b623732), TO_LIMB_T(0x92ad2afd19103e18),\n        TO_LIMB_T(0x1d794e4fac7cf0b9), TO_LIMB_T(0x0bd592fc7d825ec8) },\n      { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c),\n        TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7),\n        TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) }\n    };\n    static const vec384x sqrt_minus_sqrt_minus_1 = {\n      { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c),\n        TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7),\n        TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) },\n      { TO_LIMB_T(0x7bcfa7a25aa30fda), TO_LIMB_T(0xdc17dec12a927e7c),\n        TO_LIMB_T(0x2f088dd86b4ebef1), TO_LIMB_T(0xd1ca2087da74d4a7),\n        TO_LIMB_T(0x2da2596696cebc1d), TO_LIMB_T(0x0e2b7eedbbfd87d2) }\n    };\n    vec384x coeff, t0, t1;\n    bool_t is_sqrt, flag;\n\n    /*\n     * Instead of multiple trial squarings we can perform just one\n     * and see if the result is \"rotated by multiple of 90°\" in\n     * relation to |inp|, and \"rotate\" |ret| accordingly.\n     */\n    sqr_fp2(t0, sqrt);\n    /* \"sqrt(|inp|)\"^2 = (a + b*i)^2 = (a^2-b^2) + 2ab*i */\n\n    /* (a^2-b^2) + 2ab*i == |inp| ? |ret| is spot on */\n    sub_fp2(t1, t0, inp);\n    is_sqrt = vec_is_zero(t1, sizeof(t1));\n    vec_copy(coeff, BLS12_381_Rx.p2, sizeof(coeff));\n\n    /* -(a^2-b^2) - 2ab*i == |inp| ? \"rotate |ret| by 90°\" */\n    add_fp2(t1, t0, inp);\n    vec_select(coeff, sqrt_minus_1, coeff, sizeof(coeff),\n               flag = vec_is_zero(t1, sizeof(t1)));\n    is_sqrt |= flag;\n\n    /* 2ab - (a^2-b^2)*i == |inp| ? \"rotate |ret| by 135°\" */\n    sub_fp(t1[0], t0[0], inp[1]);\n    add_fp(t1[1], t0[1], inp[0]);\n    vec_select(coeff, sqrt_sqrt_minus_1, coeff, sizeof(coeff),\n               flag = vec_is_zero(t1, sizeof(t1)));\n    is_sqrt |= flag;\n\n    /* -2ab + (a^2-b^2)*i == |inp| ? \"rotate |ret| by 45°\" */\n    add_fp(t1[0], t0[0], inp[1]);\n    sub_fp(t1[1], t0[1], inp[0]);\n    vec_select(coeff, sqrt_minus_sqrt_minus_1, coeff, sizeof(coeff),\n               flag = vec_is_zero(t1, sizeof(t1)));\n    is_sqrt |= flag;\n\n    /* actual \"rotation\" */\n    mul_fp2(out, ret, coeff);\n\n    return is_sqrt;\n}\n\n/*\n * |inp| = a + b*i\n */\nstatic bool_t recip_sqrt_fp2(vec384x out, const vec384x inp,\n                                          const vec384x recip_ZZZ,\n                                          const vec384x magic_ZZZ)\n{\n    vec384 aa, bb, cc;\n    vec384x inp_;\n    bool_t is_sqrt;\n\n    sqr_fp(aa, inp[0]);\n    sqr_fp(bb, inp[1]);\n    add_fp(aa, aa, bb);\n\n    is_sqrt = recip_sqrt_fp(cc, aa);  /* 1/sqrt(a²+b²)                    */\n\n    /* if |inp| doesn't have quadratic residue, multiply by \"1/Z³\" ...    */\n    mul_fp2(inp_, inp, recip_ZZZ);\n    /* ... and adjust |aa| and |cc| accordingly                           */\n    {\n        vec384 za, zc;\n\n        mul_fp(za, aa, magic_ZZZ[0]); /* aa*(za² + zb²)                   */\n        mul_fp(zc, cc, magic_ZZZ[1]); /* cc*(za² + zb²)^((p-3)/4)         */\n        vec_select(aa, aa, za, sizeof(aa), is_sqrt);\n        vec_select(cc, cc, zc, sizeof(cc), is_sqrt);\n    }\n    vec_select(inp_, inp, inp_, sizeof(inp_), is_sqrt);\n\n    mul_fp(aa, aa, cc);               /* sqrt(a²+b²)                      */\n\n    sub_fp(bb, inp_[0], aa);\n    add_fp(aa, inp_[0], aa);\n    vec_select(aa, bb, aa, sizeof(aa), vec_is_zero(aa, sizeof(aa)));\n    div_by_2_fp(aa, aa);              /* (a ± sqrt(a²+b²))/2              */\n\n    /* if it says \"no sqrt,\" final \"align\" will find right one...         */\n    (void)recip_sqrt_fp(out[0], aa);  /* 1/sqrt((a ± sqrt(a²+b²))/2)      */\n\n    div_by_2_fp(out[1], inp_[1]);\n    mul_fp(out[1], out[1], out[0]);   /* b/(2*sqrt((a ± sqrt(a²+b²))/2))  */\n    mul_fp(out[0], out[0], aa);       /* sqrt((a ± sqrt(a²+b²))/2)        */\n\n    /* bound to succeed                                                   */\n    (void)sqrt_align_fp2(out, out, out, inp_);\n\n    mul_fp(out[0], out[0], cc);       /* inverse the result               */\n    mul_fp(out[1], out[1], cc);\n    neg_fp(out[1], out[1]);\n\n    return is_sqrt;\n}\n\nstatic bool_t sqrt_fp2(vec384x out, const vec384x inp)\n{\n    vec384x ret;\n    vec384 aa, bb;\n\n    sqr_fp(aa, inp[0]);\n    sqr_fp(bb, inp[1]);\n    add_fp(aa, aa, bb);\n\n    /* don't pay attention to return value, final \"align\" will tell...    */\n    (void)sqrt_fp(aa, aa);            /* sqrt(a²+b²)                      */\n\n    sub_fp(bb, inp[0], aa);\n    add_fp(aa, inp[0], aa);\n    vec_select(aa, bb, aa, sizeof(aa), vec_is_zero(aa, sizeof(aa)));\n    div_by_2_fp(aa, aa);              /* (a ± sqrt(a²+b²))/2              */\n\n    /* if it says \"no sqrt,\" final \"align\" will find right one...         */\n    (void)recip_sqrt_fp(ret[0], aa);  /* 1/sqrt((a ± sqrt(a²+b²))/2)      */\n\n    div_by_2_fp(ret[1], inp[1]);\n    mul_fp(ret[1], ret[1], ret[0]);   /* b/(2*sqrt((a ± sqrt(a²+b²))/2))  */\n    mul_fp(ret[0], ret[0], aa);       /* sqrt((a ± sqrt(a²+b²))/2)        */\n\n    /*\n     * Now see if |ret| is or can be made sqrt(|inp|)...\n     */\n\n    return sqrt_align_fp2(out, ret, ret, inp);\n}\n\nint blst_fp2_sqrt(vec384x out, const vec384x inp)\n{   return (int)sqrt_fp2(out, inp);   }\n\nint blst_fp2_is_square(const vec384x inp)\n{\n    vec384 aa, bb;\n\n    sqr_fp(aa, inp[0]);\n    sqr_fp(bb, inp[1]);\n    add_fp(aa, aa, bb);\n\n    return (int)ct_is_square_mod_384(aa, BLS12_381_P);\n}\n"
  },
  {
    "path": "src/vect.c",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n\n#include \"vect.h\"\n\n#ifdef __BLST_NO_ASM__\n# include \"no_asm.h\"\n#endif\n\n/*\n * Following are some reference C implementations to assist new\n * assembly modules development, as starting-point stand-ins and for\n * cross-checking. In order to \"polyfil\" specific subroutine redefine\n * it on compiler command line, e.g. -Dmul_mont_384x=_mul_mont_384x.\n */\n\n#ifdef lshift_mod_384\ninline void lshift_mod_384(vec384 ret, const vec384 a, size_t n,\n                           const vec384 mod)\n{\n    while(n--)\n        add_mod_384(ret, a, a, mod), a = ret;\n}\n#endif\n\n#ifdef mul_by_8_mod_384\ninline void mul_by_8_mod_384(vec384 ret, const vec384 a, const vec384 mod)\n{   lshift_mod_384(ret, a, 3, mod);   }\n#endif\n\n#ifdef mul_by_3_mod_384\ninline void mul_by_3_mod_384(vec384 ret, const vec384 a, const vec384 mod)\n{\n    vec384 t;\n\n    add_mod_384(t, a, a, mod);\n    add_mod_384(ret, t, a, mod);\n}\n#endif\n\n#ifdef mul_by_3_mod_384x\ninline void mul_by_3_mod_384x(vec384x ret, const vec384x a, const vec384 mod)\n{\n    mul_by_3_mod_384(ret[0], a[0], mod);\n    mul_by_3_mod_384(ret[1], a[1], mod);\n}\n#endif\n\n#ifdef mul_by_8_mod_384x\ninline void mul_by_8_mod_384x(vec384x ret, const vec384x a, const vec384 mod)\n{\n    mul_by_8_mod_384(ret[0], a[0], mod);\n    mul_by_8_mod_384(ret[1], a[1], mod);\n}\n#endif\n\n#ifdef mul_by_1_plus_i_mod_384x\ninline void mul_by_1_plus_i_mod_384x(vec384x ret, const vec384x a,\n                                     const vec384 mod)\n{\n    vec384 t;\n\n    add_mod_384(t, a[0], a[1], mod);\n    sub_mod_384(ret[0], a[0], a[1], mod);\n    vec_copy(ret[1], t, sizeof(t));\n}\n#endif\n\n#ifdef add_mod_384x\ninline void add_mod_384x(vec384x ret, const vec384x a, const vec384x b,\n                         const vec384 mod)\n{\n    add_mod_384(ret[0], a[0], b[0], mod);\n    add_mod_384(ret[1], a[1], b[1], mod);\n}\n#endif\n\n#ifdef sub_mod_384x\ninline void sub_mod_384x(vec384x ret, const vec384x a, const vec384x b,\n                         const vec384 mod)\n{\n    sub_mod_384(ret[0], a[0], b[0], mod);\n    sub_mod_384(ret[1], a[1], b[1], mod);\n}\n#endif\n\n#ifdef lshift_mod_384x\ninline void lshift_mod_384x(vec384x ret, const vec384x a, size_t n,\n                            const vec384 mod)\n{\n    lshift_mod_384(ret[0], a[0], n, mod);\n    lshift_mod_384(ret[1], a[1], n, mod);\n}\n#endif\n\n#if defined(mul_mont_384x) && !(defined(__ADX__) && !defined(__BLST_PORTABLE__))\nvoid mul_mont_384x(vec384x ret, const vec384x a, const vec384x b,\n                   const vec384 mod, limb_t n0)\n{\n    vec768 t0, t1, t2;\n    vec384 aa, bb;\n\n    mul_384(t0, a[0], b[0]);\n    mul_384(t1, a[1], b[1]);\n\n    add_mod_384(aa, a[0], a[1], mod);\n    add_mod_384(bb, b[0], b[1], mod);\n    mul_384(t2, aa, bb);\n    sub_mod_384x384(t2, t2, t0, mod);\n    sub_mod_384x384(t2, t2, t1, mod);\n\n    sub_mod_384x384(t0, t0, t1, mod);\n\n    redc_mont_384(ret[0], t0, mod, n0);\n    redc_mont_384(ret[1], t2, mod, n0);\n}\n#endif\n\n#if defined(sqr_mont_384x) && !(defined(__ADX__) && !defined(__BLST_PORTABLE__))\nvoid sqr_mont_384x(vec384x ret, const vec384x a, const vec384 mod, limb_t n0)\n{\n    vec384 t0, t1;\n\n    add_mod_384(t0, a[0], a[1], mod);\n    sub_mod_384(t1, a[0], a[1], mod);\n\n    mul_mont_384(ret[1], a[0], a[1], mod, n0);\n    add_mod_384(ret[1], ret[1], ret[1], mod);\n\n    mul_mont_384(ret[0], t0, t1, mod, n0);\n}\n#endif\n\nlimb_t div_3_limbs(const limb_t dividend_top[2], limb_t d_lo, limb_t d_hi);\nlimb_t quot_rem_128(limb_t *quot_rem, const limb_t *divisor, limb_t quotient);\nlimb_t quot_rem_64(limb_t *quot_rem, const limb_t *divisor, limb_t quotient);\n\n/*\n * Divide 255-bit |val| by z^2 yielding 128-bit quotient and remainder in place.\n */\nstatic void div_by_zz(limb_t val[])\n{\n    static const limb_t zz[] = { TO_LIMB_T(0x0000000100000000),\n                                 TO_LIMB_T(0xac45a4010001a402) };\n    size_t loop, zz_len = sizeof(zz)/sizeof(zz[0]);\n    limb_t d_lo, d_hi;\n\n    d_lo = zz[zz_len - 2];\n    d_hi = zz[zz_len - 1];\n    for (loop = zz_len, zz_len--; loop--;) {\n        limb_t q = div_3_limbs(val + loop + zz_len, d_lo, d_hi);\n        (void)quot_rem_128(val + loop, zz, q);\n    }\n    /* remainder is in low half of val[], quotient is in high */\n}\n\n/*\n * Divide 128-bit |val| by z yielding 64-bit quotient and remainder in place.\n */\nstatic void div_by_z(limb_t val[])\n{\n    static const limb_t z[] = { TO_LIMB_T(0xd201000000010000) };\n    size_t loop, z_len = sizeof(z)/sizeof(z[0]);\n    limb_t d_lo, d_hi;\n\n    d_lo = (sizeof(z) == sizeof(limb_t)) ? 0 : z[z_len - 2];\n    d_hi = z[z_len - 1];\n    for (loop = z_len, z_len--; loop--;) {\n        limb_t q = div_3_limbs(val + loop + z_len, d_lo, d_hi);\n        (void)quot_rem_64(val + loop, z, q);\n    }\n    /* remainder is in low half of val[], quotient is in high */\n}\n"
  },
  {
    "path": "src/vect.h",
    "content": "/*\n * Copyright Supranational LLC\n * Licensed under the Apache License, Version 2.0, see LICENSE for details.\n * SPDX-License-Identifier: Apache-2.0\n */\n#ifndef __BLS12_381_ASM_VECT_H__\n#define __BLS12_381_ASM_VECT_H__\n\n#include <stddef.h>\n\n#if defined(__x86_64__) || defined(__aarch64__)\n/* These are available even in ILP32 flavours, but even then they are\n * capable of performing 64-bit operations as efficiently as in *P64. */\ntypedef unsigned long long limb_t;\n# define LIMB_T_BITS    64\n\n#elif defined(_WIN64)   /* Win64 is P64 */\ntypedef unsigned __int64 limb_t;\n# define LIMB_T_BITS    64\n\n#elif defined(__BLST_NO_ASM__) || defined(__wasm64__)\ntypedef unsigned int limb_t;\n# define LIMB_T_BITS    32\n# ifndef __BLST_NO_ASM__\n#  define __BLST_NO_ASM__\n# endif\n\n#else                   /* 32 bits on 32-bit platforms, 64 - on 64-bit */\ntypedef unsigned long limb_t;\n#  ifdef _LP64\n#   define LIMB_T_BITS   64\n#  else\n#   define LIMB_T_BITS   32\n#   define __BLST_NO_ASM__\n#  endif\n#endif\n\n/*\n * Why isn't LIMB_T_BITS defined as 8*sizeof(limb_t)? Because pre-processor\n * knows nothing about sizeof(anything)...\n */\n#if LIMB_T_BITS == 64\n# define TO_LIMB_T(limb64)     limb64\n#else\n# define TO_LIMB_T(limb64)     (limb_t)limb64,(limb_t)(limb64>>32)\n#endif\n\n#define NLIMBS(bits)   (bits/LIMB_T_BITS)\n\ntypedef limb_t vec256[NLIMBS(256)];\ntypedef limb_t vec512[NLIMBS(512)];\ntypedef limb_t vec384[NLIMBS(384)];\ntypedef limb_t vec768[NLIMBS(768)];\ntypedef vec384 vec384x[2];      /* 0 is \"real\" part, 1 is \"imaginary\" */\n\ntypedef unsigned char byte;\n#define TO_BYTES(limb64)    (byte)limb64,(byte)(limb64>>8),\\\n                            (byte)(limb64>>16),(byte)(limb64>>24),\\\n                            (byte)(limb64>>32),(byte)(limb64>>40),\\\n                            (byte)(limb64>>48),(byte)(limb64>>56)\ntypedef byte pow256[256/8];\n\n/*\n * Internal Boolean type, Boolean by value, hence safe to cast to or\n * reinterpret as 'bool'.\n */\ntypedef limb_t bool_t;\n\n/*\n * Assembly subroutines...\n */\n#if defined(__ADX__) /* e.g. -march=broadwell */ && !defined(__BLST_PORTABLE__)\\\n                                                 && !defined(__BLST_NO_ASM__)\n# define mul_mont_sparse_256 mulx_mont_sparse_256\n# define sqr_mont_sparse_256 sqrx_mont_sparse_256\n# define from_mont_256 fromx_mont_256\n# define redc_mont_256 redcx_mont_256\n# define mul_mont_384 mulx_mont_384\n# define sqr_mont_384 sqrx_mont_384\n# define sqr_n_mul_mont_384 sqrx_n_mul_mont_384\n# define sqr_n_mul_mont_383 sqrx_n_mul_mont_383\n# define mul_384 mulx_384\n# define sqr_384 sqrx_384\n# define redc_mont_384 redcx_mont_384\n# define from_mont_384 fromx_mont_384\n# define sgn0_pty_mont_384 sgn0x_pty_mont_384\n# define sgn0_pty_mont_384x sgn0x_pty_mont_384x\n# define ct_inverse_mod_384 ctx_inverse_mod_384\n#endif\n\nvoid mul_mont_sparse_256(vec256 ret, const vec256 a, const vec256 b,\n                         const vec256 p, limb_t n0);\nvoid sqr_mont_sparse_256(vec256 ret, const vec256 a, const vec256 p, limb_t n0);\nvoid redc_mont_256(vec256 ret, const vec512 a, const vec256 p, limb_t n0);\nvoid from_mont_256(vec256 ret, const vec256 a, const vec256 p, limb_t n0);\n\nvoid add_mod_256(vec256 ret, const vec256 a, const vec256 b, const vec256 p);\nvoid sub_mod_256(vec256 ret, const vec256 a, const vec256 b, const vec256 p);\nvoid mul_by_3_mod_256(vec256 ret, const vec256 a, const vec256 p);\nvoid cneg_mod_256(vec256 ret, const vec256 a, bool_t flag, const vec256 p);\nvoid lshift_mod_256(vec256 ret, const vec256 a, size_t count, const vec256 p);\nvoid rshift_mod_256(vec256 ret, const vec256 a, size_t count, const vec256 p);\nbool_t eucl_inverse_mod_256(vec256 ret, const vec256 a, const vec256 p,\n                            const vec256 one);\nlimb_t check_mod_256(const pow256 a, const vec256 p);\nlimb_t add_n_check_mod_256(pow256 ret, const pow256 a, const pow256 b,\n                                       const vec256 p);\nlimb_t sub_n_check_mod_256(pow256 ret, const pow256 a, const pow256 b,\n                                       const vec256 p);\n\nvoid vec_prefetch(const void *ptr, size_t len);\n\nvoid mul_mont_384(vec384 ret, const vec384 a, const vec384 b,\n                  const vec384 p, limb_t n0);\nvoid sqr_mont_384(vec384 ret, const vec384 a, const vec384 p, limb_t n0);\nvoid sqr_n_mul_mont_384(vec384 ret, const vec384 a, size_t count,\n                        const vec384 p, limb_t n0, const vec384 b);\nvoid sqr_n_mul_mont_383(vec384 ret, const vec384 a, size_t count,\n                        const vec384 p, limb_t n0, const vec384 b);\n\nvoid mul_384(vec768 ret, const vec384 a, const vec384 b);\nvoid sqr_384(vec768 ret, const vec384 a);\nvoid redc_mont_384(vec384 ret, const vec768 a, const vec384 p, limb_t n0);\nvoid from_mont_384(vec384 ret, const vec384 a, const vec384 p, limb_t n0);\nlimb_t sgn0_pty_mont_384(const vec384 a, const vec384 p, limb_t n0);\nlimb_t sgn0_pty_mont_384x(const vec384x a, const vec384 p, limb_t n0);\nlimb_t sgn0_pty_mod_384(const vec384 a, const vec384 p);\nlimb_t sgn0_pty_mod_384x(const vec384x a, const vec384 p);\n\nvoid add_mod_384(vec384 ret, const vec384 a, const vec384 b, const vec384 p);\nvoid sub_mod_384(vec384 ret, const vec384 a, const vec384 b, const vec384 p);\nvoid mul_by_8_mod_384(vec384 ret, const vec384 a, const vec384 p);\nvoid mul_by_3_mod_384(vec384 ret, const vec384 a, const vec384 p);\nvoid cneg_mod_384(vec384 ret, const vec384 a, bool_t flag, const vec384 p);\nvoid lshift_mod_384(vec384 ret, const vec384 a, size_t count, const vec384 p);\nvoid rshift_mod_384(vec384 ret, const vec384 a, size_t count, const vec384 p);\nvoid div_by_2_mod_384(vec384 ret, const vec384 a, const vec384 p);\nvoid ct_inverse_mod_384(vec768 ret, const vec384 inp, const vec384 mod,\n                                                      const vec384 modx);\nvoid ct_inverse_mod_256(vec512 ret, const vec256 inp, const vec256 mod,\n                                                      const vec256 modx);\nbool_t ct_is_square_mod_384(const vec384 inp, const vec384 mod);\n\n#if defined(__ADX__) /* e.g. -march=broadwell */ && !defined(__BLST_PORTABLE__)\n# define mul_mont_384x mulx_mont_384x\n# define sqr_mont_384x sqrx_mont_384x\n# define sqr_mont_382x sqrx_mont_382x\n# define mul_382x mulx_382x\n# define sqr_382x sqrx_382x\n#endif\n\nvoid mul_mont_384x(vec384x ret, const vec384x a, const vec384x b,\n                   const vec384 p, limb_t n0);\nvoid sqr_mont_384x(vec384x ret, const vec384x a, const vec384 p, limb_t n0);\nvoid sqr_mont_382x(vec384x ret, const vec384x a, const vec384 p, limb_t n0);\nvoid mul_382x(vec768 ret[2], const vec384x a, const vec384x b, const vec384 p);\nvoid sqr_382x(vec768 ret[2], const vec384x a, const vec384 p);\n\nvoid add_mod_384x(vec384x ret, const vec384x a, const vec384x b,\n                  const vec384 p);\nvoid sub_mod_384x(vec384x ret, const vec384x a, const vec384x b,\n                  const vec384 p);\nvoid mul_by_8_mod_384x(vec384x ret, const vec384x a, const vec384 p);\nvoid mul_by_3_mod_384x(vec384x ret, const vec384x a, const vec384 p);\nvoid mul_by_1_plus_i_mod_384x(vec384x ret, const vec384x a, const vec384 p);\nvoid add_mod_384x384(vec768 ret, const vec768 a, const vec768 b,\n                     const vec384 p);\nvoid sub_mod_384x384(vec768 ret, const vec768 a, const vec768 b,\n                     const vec384 p);\n\n/*\n * C subroutines\n */\nstatic void exp_mont_384(vec384 out, const vec384 inp, const byte *pow,\n                         size_t pow_bits, const vec384 p, limb_t n0);\nstatic void exp_mont_384x(vec384x out, const vec384x inp, const byte *pow,\n                          size_t pow_bits, const vec384 p, limb_t n0);\nstatic void div_by_zz(limb_t val[]);\nstatic void div_by_z(limb_t val[]);\n\n#ifdef __UINTPTR_TYPE__\ntypedef __UINTPTR_TYPE__ uptr_t;\n#else\ntypedef const void *uptr_t;\n#endif\n\n#if !defined(restrict)\n# if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901\n#  if defined(__GNUC__) && __GNUC__>=2\n#   define restrict __restrict__\n#  elif defined(_MSC_VER)\n#   define restrict __restrict\n#  else\n#   define restrict\n#  endif\n# endif\n#endif\n\n#if !defined(inline) && !defined(__cplusplus)\n# if !defined(__STDC_VERSION__) || __STDC_VERSION__<199901\n#  if defined(__GNUC__) && __GNUC__>=2\n#   define inline __inline__\n#  elif defined(_MSC_VER)\n#   define inline __inline\n#  else\n#   define inline\n#  endif\n# endif\n#endif\n\n#if defined(__GNUC__) || defined(__clang__)\n# define launder(var) __asm__ __volatile__(\"\" : \"+r\"(var))\n#else\n# define launder(var)\n#endif\n\nstatic inline bool_t is_bit_set(const byte *v, size_t i)\n{\n    bool_t ret = (v[i/8] >> (i%8)) & 1;\n    launder(ret);\n    return ret;\n}\n\nstatic inline bool_t byte_is_zero(unsigned char c)\n{\n    limb_t ret = ((limb_t)(c) - 1) >> (LIMB_T_BITS - 1);\n    launder(ret);\n    return ret;\n}\n\nstatic inline bool_t bytes_are_zero(const unsigned char *a, size_t num)\n{\n    unsigned char acc;\n    size_t i;\n\n    for (acc = 0, i = 0; i < num; i++)\n        acc |= a[i];\n\n    return byte_is_zero(acc);\n}\n\nstatic inline void vec_cswap(void *restrict a, void *restrict b, size_t num,\n                             bool_t cbit)\n{\n    limb_t ai, *ap = (limb_t *)a;\n    limb_t bi, *bp = (limb_t *)b;\n    limb_t xorm, mask;\n    size_t i;\n\n    launder(cbit);\n    mask = (limb_t)0 - cbit;\n\n    num /= sizeof(limb_t);\n\n    for (i = 0; i < num; i++) {\n        xorm = ((ai = ap[i]) ^ (bi = bp[i])) & mask;\n        ap[i] = ai ^ xorm;\n        bp[i] = bi ^ xorm;\n    }\n}\n\n/* ret = bit ? a : b */\nvoid vec_select_32(void *ret, const void *a, const void *b, bool_t sel_a);\nvoid vec_select_48(void *ret, const void *a, const void *b, bool_t sel_a);\nvoid vec_select_96(void *ret, const void *a, const void *b, bool_t sel_a);\nvoid vec_select_144(void *ret, const void *a, const void *b, bool_t sel_a);\nvoid vec_select_192(void *ret, const void *a, const void *b, bool_t sel_a);\nvoid vec_select_288(void *ret, const void *a, const void *b, bool_t sel_a);\nstatic inline void vec_select(void *ret, const void *a, const void *b,\n                              size_t num, bool_t sel_a)\n{\n    launder(sel_a);\n#ifndef __BLST_NO_ASM__\n    if (num == 32)          vec_select_32(ret, a, b, sel_a);\n    else if (num == 48)     vec_select_48(ret, a, b, sel_a);\n    else if (num == 96)     vec_select_96(ret, a, b, sel_a);\n    else if (num == 144)    vec_select_144(ret, a, b, sel_a);\n    else if (num == 192)    vec_select_192(ret, a, b, sel_a);\n    else if (num == 288)    vec_select_288(ret, a, b, sel_a);\n#else\n    if (0) ;\n#endif\n    else {\n        limb_t bi;\n        volatile limb_t *rp = (limb_t *)ret;\n        const limb_t *ap = (const limb_t *)a;\n        const limb_t *bp = (const limb_t *)b;\n        limb_t xorm, mask = (limb_t)0 - sel_a;\n        size_t i;\n\n        num /= sizeof(limb_t);\n\n        for (i = 0; i < num; i++) {\n            xorm = (ap[i] ^ (bi = bp[i])) & mask;\n            rp[i] = bi ^ xorm;\n        }\n    }\n}\n\nstatic inline bool_t is_zero(limb_t l)\n{\n    limb_t ret = (~l & (l - 1)) >> (LIMB_T_BITS - 1);\n    launder(ret);\n    return ret;\n}\n\nstatic inline bool_t vec_is_zero(const void *a, size_t num)\n{\n    const limb_t *ap = (const limb_t *)a;\n    limb_t acc;\n    size_t i;\n\n#ifndef __BLST_NO_ASM__\n    bool_t vec_is_zero_16x(const void *a, size_t num);\n    if ((num & 15) == 0)\n        return vec_is_zero_16x(a, num);\n#endif\n\n    num /= sizeof(limb_t);\n\n    for (acc = 0, i = 0; i < num; i++)\n        acc |= ap[i];\n\n    return is_zero(acc);\n}\n\nstatic inline bool_t vec_is_equal(const void *a, const void *b, size_t num)\n{\n    const limb_t *ap = (const limb_t *)a;\n    const limb_t *bp = (const limb_t *)b;\n    limb_t acc;\n    size_t i;\n\n#ifndef __BLST_NO_ASM__\n    bool_t vec_is_equal_16x(const void *a, const void *b, size_t num);\n    if ((num & 15) == 0)\n        return vec_is_equal_16x(a, b, num);\n#endif\n\n    num /= sizeof(limb_t);\n\n    for (acc = 0, i = 0; i < num; i++)\n        acc |= ap[i] ^ bp[i];\n\n    return is_zero(acc);\n}\n\nstatic inline void cneg_mod_384x(vec384x ret, const vec384x a, bool_t flag,\n                                 const vec384 p)\n{\n    cneg_mod_384(ret[0], a[0], flag, p);\n    cneg_mod_384(ret[1], a[1], flag, p);\n}\n\nstatic inline void vec_copy(void *restrict ret, const void *a, size_t num)\n{\n    limb_t *rp = (limb_t *)ret;\n    const limb_t *ap = (const limb_t *)a;\n    size_t i;\n\n    num /= sizeof(limb_t);\n\n    for (i = 0; i < num; i++)\n        rp[i] = ap[i];\n}\n\nstatic inline void vec_zero(void *ret, size_t num)\n{\n    volatile limb_t *rp = (volatile limb_t *)ret;\n    size_t i;\n\n    num /= sizeof(limb_t);\n\n    for (i = 0; i < num; i++)\n        rp[i] = 0;\n\n#if defined(__GNUC__) || defined(__clang__)\n    __asm__ __volatile__(\"\" : : \"r\"(ret) : \"memory\");\n#endif\n}\n\nstatic inline void vec_czero(void *ret, size_t num, bool_t cbit)\n{\n    limb_t *rp = (limb_t *)ret;\n    size_t i;\n    limb_t mask;\n\n    launder(cbit);\n    mask = (limb_t)0 - (cbit^1);\n\n    num /= sizeof(limb_t);\n\n    for (i = 0; i < num; i++)\n        rp[i] &= mask;\n}\n\n/*\n * Some compilers get arguably overzealous(*) when passing pointer to\n * multi-dimensional array [such as vec384x] as 'const' argument.\n * General direction seems to be to legitimize such constification,\n * so it's argued that suppressing the warning is appropriate.\n *\n * (*)  http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1923.htm\n */\n#if defined(__INTEL_COMPILER)\n# pragma warning(disable:167)\n# pragma warning(disable:556)\n#elif defined(__GNUC__) && !defined(__clang__) && (__STDC_VERSION__-0) < 202311\n# pragma GCC diagnostic ignored \"-Wpedantic\"\n#elif defined(_MSC_VER)\n# pragma warning(disable: 4127 4189)\n#endif\n\n#if !defined(__wasm__) && __STDC_HOSTED__-0 != 0\n# include <stdlib.h>\n#endif\n\n#if defined(__GNUC__)\n# ifndef alloca\n#  define alloca(s) __builtin_alloca(s)\n# endif\n#elif defined(__sun)\n# include <alloca.h>\n#elif defined(_WIN32)\n# include <malloc.h>\n# ifndef alloca\n#  define alloca(s) _alloca(s)\n# endif\n#endif\n\n#endif /* __BLS12_381_ASM_VECT_H__ */\n"
  }
]